/**IMPORT OTHER KITCHEN CATEGORIES DATA FROM AMAZON***/
PROC IMPORT 
DATAFILE ="C:\Users\F00456N\Documents\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\merged\kitchenall_amazon_4_7_20.csv"
OUT= amazon_kitchnequip3 DBMS=csv
REPLACE;
run;
    data WORK.AMAZON_KITCHNEQUIP3    ;
 %let _EFIERR_ = 0; /* set the ERROR detection macro variable */
 infile 'C:\Users\F00456N\Documents\Amazon Price Dynamics\New Data\Data Scraped\Home_Kitchen\merged\kitchenall_amazon_4_7_20.csv' delimiter = ',' MISSOVER DSD
lrecl=32767 firstobs=2 ;
        informat Time anydtdtm40. ;
        informat ASIN $10. ;
        informat Product_Description__same_as_the $135. ;
        informat Price nlnum32. ;
        informat Condition___New $4. ;
        informat Free_Shipping__Y_N_ $13. ;
        informat Buy_Box_Price nlnum32. ;
        informat Product_Star_Rating best32. ;
        informat __Customer_Reviews_on_Product $8. ;
        informat __Answered_Questions $30. ;
        informat In_Stock__Indicator_Y_N_NA_varia $4. ;
        informat Count_in_stock__if_available_ $3. ;
        informat Fulfilled_by_Amazon__Y_N_ $5. ;
        informat Amazon_Prime__Y_N_ $5. ;
        informat Sales_Rank_in_Own_Category $8. ;
        informat Sales_Rank_in_Sub__Category best32. ;
        informat Indicator_variable__Focal_Produc $1. ;
        informat Indicator_variable__Sponsored_Pr $1. ;
        informat Indicator_variable__Competitor_P $1. ;
        informat Indicator_variable__Similar_Item $1. ;
        informat Seller_ID $14. ;
        informat Seller_Rating best32. ;
        informat __Positive_Ratings best32. ;
        informat __Ratings $7. ;
        informat time1 mmddyy10. ;
        informat season best32. ;
        informat categ $15. ;
        informat brand $20. ;
        informat price1 best32. ;
        informat Buy_Box_Price1 best32. ;
        informat seller_rate best32. ;
        informat num_seller_Posrating best32. ;
        informat num_seller_ratings best32. ;
        informat Product_Star_Rate best32. ;
        informat Sales_Rank_Own_Categ best32. ;
        informat Sales_Rank_Sub_categ best32. ;
        informat Answered_Questions best32. ;
        informat Product_reviews best32. ;
        informat condition best32. ;
        informat Freeship best32. ;
        informat FBA best32. ;
        informat Prime best32. ;
        informat Instock best32. ;
        format Time datetime. ;
        format ASIN $10. ;
        format Product_Description__same_as_the $135. ;
        format Price nlnum12. ;
        format Condition___New $4. ;
        format Free_Shipping__Y_N_ $13. ;
        format Buy_Box_Price nlnum12. ;
        format Product_Star_Rating best12. ;
        format __Customer_Reviews_on_Product $8. ;
        format __Answered_Questions $30. ;
        format In_Stock__Indicator_Y_N_NA_varia $4. ;
        format Count_in_stock__if_available_ $3. ;
        format Fulfilled_by_Amazon__Y_N_ $5. ;
         format Amazon_Prime__Y_N_ $5. ;
         format Sales_Rank_in_Own_Category $8. ;
         format Sales_Rank_in_Sub__Category best12. ;
         format Indicator_variable__Focal_Produc $1. ;
         format Indicator_variable__Sponsored_Pr $1. ;
         format Indicator_variable__Competitor_P $1. ;
         format Indicator_variable__Similar_Item $1. ;
         format Seller_ID $14. ;
         format Seller_Rating best12. ;
         format __Positive_Ratings best12. ;
         format __Ratings $7. ;
         format time1 mmddyy10. ;
         format season best12. ;
         format categ $15. ;
         format brand $20. ;
         format price1 best32. ;
         format Buy_Box_Price1 best32. ;
         format seller_rate best32. ;
         format num_seller_Posrating best32. ;
         format num_seller_ratings best32. ;
         format Product_Star_Rate best32. ;
         format Sales_Rank_Own_Categ best32. ;
         format Sales_Rank_Sub_categ best32. ;
         format Answered_Questions best32. ;
         format Product_reviews best32. ;
         format condition best32. ;
         format Freeship best32. ;
         format FBA best32. ;
         format Prime best32. ;
         format Instock best32. ;
      input
                  Time
                  ASIN  $
                  Product_Description__same_as_the  $
                  Price
                  Condition___New  $
                  Free_Shipping__Y_N_  $
                  Buy_Box_Price
                  Product_Star_Rating
                  __Customer_Reviews_on_Product  $
                  __Answered_Questions  $
                  In_Stock__Indicator_Y_N_NA_varia  $
                  Count_in_stock__if_available_  $
                  Fulfilled_by_Amazon__Y_N_  $
                  Amazon_Prime__Y_N_  $
                  Sales_Rank_in_Own_Category  $
                  Sales_Rank_in_Sub__Category
                  Indicator_variable__Focal_Produc  $
                  Indicator_variable__Sponsored_Pr  $
                  Indicator_variable__Competitor_P  $
                  Indicator_variable__Similar_Item  $
                  Seller_ID  $
                  Seller_Rating
                  __Positive_Ratings
                  __Ratings  $
                 time1
                 season
                 categ  $
                 brand  $
                 price1
                 Buy_Box_Price1
                 seller_rate
                 num_seller_Posrating
                 num_seller_ratings
                 Product_Star_Rate
                 Sales_Rank_Own_Categ
                 Sales_Rank_Sub_categ
                 Answered_Questions
                 Product_reviews
                 condition
                 Freeship
                 FBA
                 Prime
                 Instock  ;
      if _ERROR_ then call symputx('_EFIERR_',1);  /* set ERROR detection macro variable */
     run;


PROC IMPORT 
DATAFILE ="C:\Users\F00456N\Documents\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\luggage_boardgamesdata_4_6_2020.csv"
OUT= amazon_luggageboardgames DBMS=csv
REPLACE;
run;


/**selecting only deep fryer and microwave; water piusopolyher data not available in the external sites and electric cooker is the focal category**/
data amazon_kitchnequip4 (keep = time1 ASIN seller_ID brand categ Answered_Questions price1 Buy_Box_Price1 Seller_Rating
num_seller_Posrating  num_seller_ratings Product_Star_Rate Sales_Rank_Own_Categ Sales_Rank_Sub_categ 
Product_reviews condition Freeship  FBA Prime Instock season);
set amazon_kitchnequip3;
where categ in ('Deep Fryer', 'Microwave');
if Buy_Box_Price1 = . then Nobuyboxwinner = 1;
else Nobuyboxwinner = 0;
run;
proc freq data = amazon_boardgames;
tables categ;
run;
PROC SORT DATA = amazon_kitchnequip4 NODUPKEY OUT= amazon_kitchnequip4;
BY TIME1 ASIN Seller_ID condition ;
RUN;
proc contents data = amazon_boardgames1;
run;

data amazon_boardgames1 (keep = time1 ASIN seller_ID brand categ Answered_Questions price1 Buy_Box_Price1 Seller_Rating
num_seller_Posrating  num_seller_ratings Product_Star_Rate Sales_Rank_Own_Categ Sales_Rank_Sub_categ 
Product_reviews condition Freeship  FBA Prime Instock season);
set amazon_luggageboardgames;
if Answered_Questions  = . then Answered_Questions  = 0;
run;
/*PROC SORT DATA = amazon_boardgames1 NODUPKEY OUT= amazon_boardgamesboardgames2;*/
/*BY TIME1 ASIN Seller_ID condition ;*/ /*already deduped in data processing*/
/*RUN;*/
/*stacking data sets*/
data amazon_allothers;
set amazon_kitchnequip4 amazon_boardgames1;
run;

/***3P SELLER CHARACTERISTICS***/

/****RESTRICTING DATA TO THE TIME LENGTH EXTERNAL SITE INFORMATION IS AVAILABLE***/

proc freq data = amazon_allothers1;
tables brand;
run;
data amazon_allothers1;
SET amazon_allothers;
where time1 >= '14Dec2017'd and time1 <= '06May2018'd ;
/*length brand1 $50.;*/
/*brand1 = brand;*/
RUN;
/**selecting board games categ for brand charac table**/
data boardgames;
set amazon_boardgames1;
where categ = 'Board Games';
run;

proc means data = boardgames n mean nway ;
class brand;
var Buy_Box_Price1 Answered_Questions Product_Star_Rate Product_reviews Sales_Rank_Own_Categ;
output out = summ_brand mean=;
run;
proc sql;
create table distinctsellers1 as
select 
brand,
count(distinct seller_ID) as num_sellers
from 
boardgames
where condition = 1 and seller_ID NE 'Amazon'
group by
brand;
quit;
proc print data = distinctsellers1;
run;

/****price change by ASIN***/

/**COMPUTING PRICE CHANGE BY ID, BRAND AND CATEG***/
data amazon_allothers11;
set amazon_allothers1;
where condition = 1;
run;
proc sort data = amazon_allothers11 NODUPkey out=amazon_allothers2 ;
by  ASIN seller_ID time1 ;
run;
proc expand data=amazon_allothers2 out=amazon_allothers3 method = none; 
  by ASIN seller_ID ;
  convert price1 = price_lag1     / transformout=(lag 1); 
RUN;

proc sql;
create table price_changes_amazonothers as
select 
time1,
ASIN,
categ,
brand,
seller_ID,
(case when price_lag1 ne . and price_lag1 ne 0 then (price1-price_lag1)/price_lag1 else 0 end) as pricechange

from
amazon_allothers3
group by
time1,
ASIN,
categ,
brand,
seller_ID;
quit;
/***count number of price changes by seller type and ASIN***/
proc sql;
create table numpricechange_amazonothers  as
select
ASIN,
categ,
brand,
seller_ID,
count(distinct time1) as num_days,
sum(case when pricechange ne 0 then 1 else 0 end) as num_pricechanges
from
price_changes_amazonothers
group by
ASIN,
categ,
brand,
seller_ID;
quit;
proc freq data = numpricechange_amazonothers;
tables num_pricechanges*brand /list;
where categ = 'Board Games' and seller_ID = 'Amazon';
run;
proc sort data = numpricechange_amazonothers;
by categ;
run;

/**selecting 3P sellers who have been on the marketplace > 10 days**/
DATA REDUCED_SELLERS;
SET numpricechange_amazonothers;
WHERE num_days > 10;
RUN;


proc sort data = REDUCED_SELLERS (keep = ASIN seller_ID num_pricechanges num_days) nodupkey out = reduced_seller_list;
by ASIN seller_ID;
run;
proc sort data = amazon_allothers1 out = amazon_allothers_sort;
by ASIN seller_ID;
run;
data amazon_allothers_finalsellers;
merge amazon_allothers_sort (in=a) reduced_seller_list (in=b);
by ASIN seller_ID;
if a and b;
run;

/**classifying used and new product sellers**/

data amazon_allothers_usednew;
set amazon_allothers_finalsellers;
length seller_type $50.;
IF CONDITION = 0 AND SELLER_ID = 'Amazon'  THEN SELLER_TYPE = 'Amazon Old';
ELSE IF CONDITION = 1 AND SELLER_ID = 'Amazon' THEN SELLER_TYPE = 'Amazon New';
ELSE IF CONDITION = 0 AND SELLER_ID NE 'Amazon' THEN SELLER_TYPE = '3P Old';
ELSE IF CONDITION = 1 AND SELLER_ID NE 'Amazon' THEN SELLER_TYPE = '3P New';
run;
proc freq data = amazon_allothers_usednew;
tables condition seller_type;
run;

proc means data = amazon_allothers_usednew n mean max min std ;
class categ seller_type;
var num_days num_pricechanges;
run;
proc print data = amazon_allothers_usednew (obs=10);
var seller_id;
where seller_type in ( '3P Old','3P New');
run;


/***DETERMINING SELLER CHARACTERISTICS FOR EACH SELLER***/

data amazon_allothers_new;
set amazon_allothers_usednew;
where seller_type = '3P New';
run;
proc freq data = amazon_allothers_usednew;
tables FBA*seller_type /list;
where categ = 'Board Games';
run;

proc sql;
create table SELLER_VARS as
select 
categ,
Seller_ID,
SELLER_TYPE,
count(ASIN) as num_ASIN_sold,
count(distinct ASIN) as num_uniq_ASIN_sold,
mean(condition) as mean_condition,
mean(freeship) as mean_freeship,
mean(FBA) as mean_FBA,
mean(Prime) as mean_prime,
MEAN(seller_rating) as MEAN_SELLER_RATE,
min(seller_rating) as Min_SELLER_RATE,
max(seller_rating) as Max_SELLER_RATE,
MAX(num_seller_Posrating) AS MAX_PERC_POSRATING,
Min(num_seller_Posrating) AS Min_PERC_POSRATING,
Mean(num_seller_Posrating) AS Mean_PERC_POSRATING,
MAX(num_seller_ratings) AS MAX_NUM_RATINGS,
Min(num_seller_ratings) AS Min_NUM_RATINGS,
Mean(num_seller_ratings) AS Mean_NUM_RATINGS,
MEAN(price1) as MEAN_PRICE,
Max(price1) as Max_PRICE,
Min(price1) as Min_PRICE
from 
amazon_allothers_new
group by
categ,
Seller_ID,
SELLER_TYPE
;
quit;

PROC EXPORT 
DATA=SELLER_VARS
DBMS=csv
OUTFILE= "C:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\3psellercharac_4_15_20.csv"
REPLACE;
run;
proc contents data = SELLER_VARS;
run;
proc means data = SELLER_VARS n mean max min std nway;
class categ;
var 
mean_FBA
mean_freeship
MEAN_SELLER_RATE
MAX_NUM_RATINGS
MAX_PERC_POSRATING
num_uniq_ASIN_sold;
output out = summ_sellercharc ;
run;

proc standard data = SELLER_VARS  mean=0 std=1 out= SELLER_standvars_othercateg;
VAR 
MAX_NUM_RATINGS
MAX_PERC_POSRATING
MEAN_PRICE
MEAN_SELLER_RATE
Max_PRICE
Max_SELLER_RATE
Mean_NUM_RATINGS
Mean_PERC_POSRATING
Min_NUM_RATINGS
Min_PERC_POSRATING
Min_PRICE
Min_SELLER_RATE
mean_FBA
mean_prime
mean_condition
mean_freeship
num_ASIN_sold
num_uniq_ASIN_sold
;
by categ;
RUN;
/*PROC PRINT DATA = SELLER_standvars_Cooker (obs=20);*/
/*RUN;*/

/**RUNNING VARCLUS PROCEDURE TO REMOVE CORRELATED VARIABLES**/


/**RUNNING FASTCLUS PROCEDURE for each category***/
proc freq data = SELLER_standvars_othercateg;
tables categ;
run;
data boardgames_clusteranalysis;
set SELLER_standvars_othercateg;
where categ = 'Board Games';
run;
proc varclus data=boardgames_clusteranalysis    outtree=tree centroid MAXITER = 1000 maxclusters= 5  CENTROID;
var 
Mean_NUM_RATINGS
Mean_PERC_POSRATING
mean_FBA
mean_prime
mean_freeship
num_ASIN_sold
num_uniq_ASIN_sold
;
run;

proc fastclus data = boardgames_clusteranalysis
 outseed= mean1
 maxc= 10 converge = 0 maxiter=100000 summary;
 var 
Max_NUM_RATINGS
Mean_PERC_POSRATING
mean_FBA
/*mean_prime*/
mean_freeship
/*num_ASIN_sold*/
num_uniq_ASIN_sold
;
run;
data seed;
set mean1;
if _freq_>10;
run;
proc fastclus data=boardgames_clusteranalysis seed=seed maxc=10 least=1 out=sellers_clusters_boardgames;
 var 
Max_NUM_RATINGS
Mean_PERC_POSRATING
mean_FBA
/*mean_prime*/
mean_freeship
/*num_ASIN_sold*/
num_uniq_ASIN_sold
;
run;

PROC EXPORT 
DATA=sellers_clusters_boardgames
DBMS=csv
OUTFILE= "C:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\clusteroutput_boardgames_5_5_20.csv"
REPLACE;
run;

/*PROC FREQ DATA = sellers_clusters_boardgames;*/
/*TABLES CLUSTER;*/
/*RUN;*/

/***MERGING CLUSTERS BACK TO THE UN-STANDARDIZED DATA SET TO RECOVER MEAN CLUSTER CHARACTERISTICS****/

PROC SORT DATA = sellers_clusters_boardgames (KEEP= Seller_ID Cluster ) OUT=sellers_clusters_boardgames_SORT;
BY Seller_ID;
RUN;
DATA SELLER_VARS_boardgames;
SET SELLER_VARS;
WHERE CATEG = 'Board Games';
RUN;

PROC SORT DATA = SELLER_VARS_boardgames out=SELLER_VARS_boardgames_sort;
BY Seller_ID;
RUN;

data seller_boardgames_clusters (drop= cluster);
merge sellers_clusters_boardgames_SORT (in=a) SELLER_VARS_boardgames_sort (in=b);
BY Seller_ID;
if a and b;
IF CLUSTER = 1 THEN seller_type = 'Cluster 1';
if CLUSTER = 2 THEN seller_type = 'Cluster 2';
if CLUSTER = 3 THEN seller_type = 'Cluster 3';
if CLUSTER = 4 THEN seller_type = 'Cluster 4';
run;
PROC CONTENTS DATA = numpricechange_amazonothers;
RUN;
proc means data = seller_boardgames_clusters n mean min max std nway ;
class seller_type;
var
MAX_NUM_RATINGS
MAX_PERC_POSRATING
MEAN_SELLER_RATE
Max_SELLER_RATE
Mean_NUM_RATINGS
Mean_PERC_POSRATING
Min_NUM_RATINGS
Min_PERC_POSRATING
Min_PRICE
Min_SELLER_RATE
mean_FBA
mean_prime
mean_condition
mean_freeship
num_ASIN_sold
num_uniq_ASIN_sold;
output out= seller_boardgames_cluster_charac ;
run;

PROC EXPORT 
DATA=seller_boardgames_cluster_charac
DBMS=csv
OUTFILE= "C:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\clusterPROFILEs_boardgames_5_5.csv"
REPLACE;
run;
/***MERGING CLUSTER OUTPUT WITH THE NUMBER OF PRICE CHANGES AND NUMBER OF DAYS TREND***/
data numpricechange_boardgames;
set numpricechange_amazonothers;
where categ = 'Board Games';
run;

PROC SORT DATA = numpricechange_boardgames OUT= SORTED_PRICECHANGES;
BY SELLER_ID;
RUN;
PROC SORT DATA = seller_boardgames_clusters out= sorted_sellerclusters;
by seller_id;
run;
DATA SELLER_PRICECHANGE_boardgames;
MERGE SORTED_PRICECHANGES (IN=A) sorted_sellerclusters (IN=B);
BY SELLER_ID;
IF A AND B;
RUN;
/***above file exported as PRICECHANGES_SELLERCLUSTER_boardgames_4_24.csv****/

/******MERGING CLUSTER OUTPUT WITH THE DAILY PRICE TREND DATA AT ASIN LEVEL***/
/**/
/*proc freq data = amazon_allothers_usednew;*/
/*tables seller_type;*/
/*run;*/
/*proc contents data = amazon_allothers_usednew;*/
/*run;*/
data boardgames_usednew;
set amazon_allothers_usednew;
where categ = 'Board Games';
run;

proc sql;
create table price_dev_BB as
select 
time1,
ASIN,
Seller_ID,
(price1-Buy_Box_Price1)as dev_BBprice

from 
boardgames_usednew
group by
time1,
ASIN,
Seller_ID
;
quit;
/***ADDED VARIABLES AFTER CONVO WITH BRYAN --- NEED TO INCLUDE THESE IN THE SELLER ENTRY AND BUY BOX MODELS***/
/**SELLER ENTRY: NON BUY BOX 3P PRICE; BUY BOX MODEL: DIFFERENCE FROM LOWEST PRICE OFFER***/
proc sql;
   create table lowestprice_offer as
   select 
      time1,
      ASIN,
/*      Buy_Box_Price1,*/
      min(price1) as lowestprice_offer
   from boardgames_usednew
   group by time1, ASIN;
quit;
proc print data = lowestprice_offer (obs=20);
run;

data non_buybox3pprice;
set boardgames_usednew;
where seller_id ne 'Amazon' and price1 ne Buy_Box_Price1;
run;
proc sql;
   create table mean_nonbuybox3pprice as
   select 
      time1,
      ASIN,
      mean(price1) as nonbuybox3p_price
   from non_buybox3pprice
   group by time1, ASIN;
quit;

/***adding the mean non buy box price for the seller entry model*****/
proc sort data = mean_nonbuybox3pprice;
by time1 ASIN;
run;

proc sort data = boardgames_usednew out = focal_rival_prods26_sort;
by time1 ASIN;
run;
proc freq data = boardgames_usednew;
tables asin*brand / list;
run;

data focal_rival_prods27;
merge focal_rival_prods26_sort (in=a) mean_nonbuybox3pprice (in=b) ;
by time1 ASIN;
if a and b ;
run;
proc means data = focal_rival_prods27 n sum nway noprint;
class time1 ASIN seller_id seller_type;
var price1 Buy_Box_Price1 nonbuybox3p_price Seller_Rating num_seller_Posrating  num_seller_ratings condition Freeship  FBA Prime Instock ;
output out = price_trend sum= ;
run;
PROC SORT DATA = price_trend OUT= price_trend1;
BY SELLER_ID ;
RUN;
data price_trend2 (drop=seller_type);
set price_trend1;
where seller_type = '3P New';
run;
data price_trend3;
set price_trend1;
where seller_type ne '3P New';
run;

PROC SORT DATA = seller_boardgames_clusters (KEEP= SELLER_ID seller_type) OUT= SELLER_TYPE1;
BY SELLER_ID ;
RUN;

DATA newSELLER_TYPE_TREND ;
MERGE SELLER_TYPE1 (IN=A) price_trend2 (IN=B);
BY SELLER_ID ;
IF B;
RUN;
/*proc print data = newSELLER_TYPE_TREND (obs=20);*/
/*run; */
data SELLER_TYPE_TREND;
set newSELLER_TYPE_TREND price_trend3;
run;

PROC SORT DATA = SELLER_TYPE_TREND;
BY TIME1 ASIN;
RUN;

DATA SELLER_TYPE_TREND1;
MERGE SELLER_TYPE_TREND (IN=A) lowestprice_offer (IN=B);
BY TIME1 ASIN;
IF A ;
RUN;
/****EXTRA SUMMARY NEEDED FOR DEVIATION FROM BUY BOX PRICE - THIS VARIABLE IS NOT INCLUDED IN THE FINALDATA USED FOR MODELING**/
proc sort data = price_dev_BB;
by time1 asin seller_id;
run;
proc sort data = SELLER_TYPE_TREND1;
by time1 asin seller_id;
run;

data devbuybox_bgames;
merge seller_type_trend1 (in=a) price_dev_bb (in=b);
by time1 asin seller_id;
if a and b;
run;
proc print data = devbuybox_bgames (obs=10);
/*var nonbuybox3p_price;*/
/*where ASIN = 'B00NQ7QFGM';*/
run;
PROC MEANS DATA = devbuybox_bgames N SUM MEAN MAX MIN NWAY;
CLASS SELLER_TYPE;
VAR dev_BBprice;
OUTPUT OUT = DEV_SUMMARY;
RUN;

/*****************************************/
proc print data = SELLER_TYPE_TREND1 (obs=20);
run;
/*proc freq data = SELLER_TYPE_TREND;*/
/*tables seller_type;*/
/*run;*/

/*PROC EXPORT */
/*DATA=SELLER_TYPE_TREND*/
/*DBMS=csv*/
/*OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\Sellerclusters_boardgames_May2020.csv"*/
/*REPLACE;*/
/*run;*/
PROC EXPORT 
DATA=SELLER_TYPE_TREND1
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\Sellerclusters_boardgames_July2025.csv"
REPLACE;
run;

/**COMPUTING PRICE CHANGE BY ASIN AND SELLER TYPE***/

proc sort data = SELLER_TYPE_TREND1 NODUPREC out=SELLER_TYPE_TREND11 ;
by  ASIN seller_type seller_ID TIME1 ;
run;
proc expand data=SELLER_TYPE_TREND11 out=SELLER_TYPE_TREND12 method = none; 
  by ASIN seller_type seller_ID;
  convert price1 = price_lag1     / transformout=(lag 1); 
RUN;

proc sql;
create table price_changes_sellertype as
select 
time1,
ASIN,
seller_type,
seller_ID,
price1,
(price1 - lowestprice_offer) as pricediff_fromlowest,
(case when price_lag1 ne . and price_lag1 ne 0 then (price1-price_lag1)/price_lag1 else 0 end) as pricechange
from
SELLER_TYPE_TREND12
group by
time1,
ASIN,
seller_type,
seller_ID;
quit;
/*PROC EXPORT */
/*DATA=price_changes_sellertype*/
/*DBMS=csv*/
/*OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\price_changes_boardgames_May2020.csv"*/
/*REPLACE;*/
/*run;*/
PROC EXPORT 
DATA=price_changes_sellertype
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\price_changes_boardgames_July2025.csv"
REPLACE;
run;
/***count number of price changes by seller type and ASIN***/

proc sql;
create table num_pricechange_sellertype_asin as
select
seller_ID, 
seller_type,
ASIN,
count(distinct time1) as num_days,
sum(case when pricechange ne 0 then 1 else 0 end) as num_pricechanges
from
price_changes_sellertype
group by
seller_ID, 
seller_type,
ASIN;
quit;
/*PROC EXPORT */
/*DATA=num_pricechange_sellertype_asin*/
/*DBMS=csv*/
/*OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\num_pricechange_sellertype_asin_boardgames_May2020.csv"*/
/*REPLACE;*/
/*run;*/
PROC EXPORT 
DATA=num_pricechange_sellertype_asin
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\num_pricechange_sellertype_asin_boardgames_July2025.csv"
REPLACE;
run;

/***selecting the representative seller and ASIN per cluster with most number of price changes in the period***/

proc sort data = num_pricechange_sellertype_asin out= sorted_bymostchanges ;
by seller_type descending num_pricechanges  descending num_days ASIN;
run;

/*PROC EXPORT */
/*DATA=sorted_bymostchanges*/
/*DBMS=csv*/
/*OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\sorted_bymostchanges_boardgames_May2020.csv"*/
/*REPLACE;*/
/*run;*/

PROC EXPORT 
DATA=sorted_bymostchanges
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\sorted_bymostchanges_boardgames_July2025.csv"
REPLACE;
run;
data cluster1seller;
set sorted_bymostchanges;
where seller_type  = 'Cluster 1' and ASIN in ( 'B00UD6EXIQ', 'B018RFIVYG') ;
run;
proc sort data = cluster1seller nodupkey out= cluster1_pricechange_vector;
by seller_type ASIN;
run;
proc print data = cluster1_pricechange_vector;
run;
data cluster2seller;
set sorted_bymostchanges;
where seller_type = 'Cluster 2' and ASIN = 'B00UD6EXIQ' ;
run;
proc sort data = cluster2seller nodupkey out= cluster2_pricechange_vector;
by seller_type ;
run;
data cluster3sellers;
set sorted_bymostchanges;
where seller_type = 'Cluster 3' and ASIN in ( 'B00UB25IJA' ) ;
run;
proc sort data = cluster3sellers nodupkey out= cluster3_pricechange_vector;
by seller_type ASIN;
run;
data cluster4sellers;
set sorted_bymostchanges;
where seller_type = 'Cluster 4' and ASIN in ( 'B00UB25IJA') ;
run;
proc sort data = cluster4sellers nodupkey out= cluster4_pricechange_vector;
by seller_type ASIN;
run;
data amazon;
set sorted_bymostchanges;
where seller_type in ('Amazon New') and ASIN IN ( 'B00UD6EXIQ', 'B00UB25IJA' , 'B018RFIVYG');
run;

data newandamazonsellers;
set cluster1_pricechange_vector cluster2_pricechange_vector cluster3_pricechange_vector cluster4_pricechange_vector amazon;
run;
proc freq data = newandamazonsellers;
tables seller_ID*seller_type*ASIN /list;
run;
/**merging back to the price changes main file to get the trend for the chosen seller IDs and ASINs**/

/*Amazon New   B00UD6EXIQ USAopoly - USO1**/
/*Amazon New	B00UB25IJA	USAopoly - USO2*/
/*Amazon New	B018RFIVYG	Fantasy Flight*/
/*Cluster 1	B00UD6EXIQ	USAopoly - USO1*/
/*Cluster 1 B018RFIVYG Fantasy Flight **/
/*Cluster 2	B00UD6EXIQ	USAopoly - USO1*/
/*Cluster 3	B00UB25IJA	USAopoly - USO2*/
/*Cluster 4	B00UB25IJA	USAopoly - USO2*/

proc sort data = price_changes_sellertype out= price_changes_sellertype1;
by seller_type seller_ID ASIN;
run;
proc sort data = newandamazonsellers out= newandamazonsellers1;
by seller_type seller_ID ASIN;
run;
data pricechangetrend_outcome;
merge newandamazonsellers1 (in=a) price_changes_sellertype1 (in=b);
by seller_type seller_ID ASIN;
if a and b;
run;
proc sort data = pricechangetrend_outcome (keep=time1) nodupkey out= final_outputvector;
by time1;
run;
%macro pricechange(a,b,c);
data pricechange&b. (keep = time1 pricechange_&b. price&b. pricediff_lowst_&b.);
set pricechangetrend_outcome;
where seller_type = &a. and ASIN = &c.;
rename pricechange = pricechange_&b. ;
rename price1 = price&b.;
rename pricediff_fromlowest = pricediff_lowst_&b.;
run;

data final_outputvector ;
merge final_outputvector (in=a) pricechange&b. (in=b);
by time1;
if a ;
if a and not b then pricechange_&b. = 0;
run;
%mend;
%pricechange(a='Amazon New', b = Amzn_uso1, c = 'B00UD6EXIQ'); 
%pricechange(a='Amazon New', b = Amzn_uso2, c = 'B00UB25IJA'); 
%pricechange(a='Amazon New', b = Amzn_fflite, c = 'B018RFIVYG'); 
%pricechange(a='Cluster 1', b = clust1_uso1, c = 'B00UD6EXIQ'); 
%pricechange(a='Cluster 1', b = clust1_fflite, c = 'B018RFIVYG');
%pricechange(a='Cluster 2', b = clust2_uso1, c = 'B00UD6EXIQ'); 
%pricechange(a='Cluster 3', b = clust3_uso2, c = 'B00UB25IJA'); 
%pricechange(a='Cluster 4', b = clust4_uso2, c = 'B00UB25IJA'); 
proc print data = final_outputvector (obs=20);
run;
/**/
/*PROC EXPORT */
/*DATA=final_outputvector*/
/*DBMS=csv*/
/*OUTFILE= "C:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\pricechangetrend_boardgames_May2020.csv"*/
/*REPLACE;*/
/*run;*/
PROC EXPORT 
DATA=final_outputvector
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\pricechangetrend_boardgames_July2025.csv"
REPLACE;
run;

/***CLUSTER MEMBERSHIP: NUMBER OF SELLERS IN A CLUSTER AT TIME T, BRANDS SOLD BY SELLER TYPES AT TIME T,
NUMBER OF PRICE CHANGES BY OWN AND CROSS BRANDS BY SELLER TYPES AT TIME (T-1), (T-2), (T-3), *******/
/**OUTCOME VECTOR*/
/*Amazon New   B00UD6EXIQ USAopoly - USO1**/
/*Amazon New	B00UB25IJA	USAopoly - USO2*/
/*Amazon New	B018RFIVYG	Fantasy Flight*/
/*Cluster 1	B00UD6EXIQ	USAopoly - USO1*/
/*Cluster 1 B018RFIVYG Fantasy Flight **/
/*Cluster 2	B00UD6EXIQ	USAopoly - USO1*/
/*Cluster 3	B00UB25IJA	USAopoly - USO2*/
/*Cluster 4	B00UB25IJA	USAopoly - USO2*/

/***full list of ASINs and brand**/
/*1589947207	Fantasy Flight*/
/*1616615885	Fantasy Flight*/
/*B00DLX51HK	4D Cityscape*/
/*B00UB25IJA	USAopoly*/
/*B00UD6EXIQ	USAopoly*/
/*B018RFIVYG	Fantasy Flight*/
/*B01F00U4VY	USAopoly*/
/*B01JA8B18M	Fantasy Flight*/



PROC SQL;
CREATE TABLE SELLER_offercharcs AS
SELECT
TIME1,
SELLER_TYPE,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS,
COUNT(DISTINCT ASIN) AS NUM_SKUS,
sum(case when ASIN in ('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M') then 1 else 0 end) AS NUM_fflite, /**number of offers on each brand**/
sum(case when ASIN = 'B00DLX51HK' then 1 else 0 end) as num_4dcity, 
sum(case when ASIN  in ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY') then 1 else 0 end) as num_usopoly
from 
SELLER_TYPE_TREND12
group by
time1,
seller_type;
quit;

proc sort data = SELLER_offercharcs (keep = time1) nodupkey out= SELLER_offercharcs_base;
by time1;
run;

%macro seller_offercharacs(a,b);
data seller_offer&a.;
set SELLER_offercharcs;
where seller_type = &b.;
rename NUM_SELLERS = NUM_SELLERS&a.;
rename NUM_SKUS = NUM_SKUS&a.;
rename NUM_fflite = NUM_fflite&a.;
rename num_4dcity = num_4dcity&a.;
rename num_usopoly = num_usopoly&a.;
run;

proc sort data = seller_offer&a. (drop=  seller_type) out= sorted_seller_type;
by time1;
run;

data SELLER_offercharcs_base;
merge SELLER_offercharcs_base (in=a) sorted_seller_type (in=b);
by time1;
if a ;
if a and not b then do;
NUM_SELLERS&a. = 0;
NUM_SKUS&a. = 0;
NUM_fflite&a. = 0;
num_4dcity&a.= 0;
num_usopoly&a.= 0;
end;
run;
%mend;
%seller_offercharacs(a = used3p, b = '3P Old');
%seller_offercharacs(a = usedamzn, b = 'Amazon Old'); 
%seller_offercharacs(a = amzn, b = 'Amazon New');
%seller_offercharacs(a = clust1, b = 'Cluster 1'); 
%seller_offercharacs(a = clust2, b = 'Cluster 2'); 
%seller_offercharacs(a = clust3, b = 'Cluster 3'); 
%seller_offercharacs(a = clust4, b = 'Cluster 4'); 


/*****COMPETITOR PRICING INFORMATION*****/
/**ACCOUNTING FOR PRICE CHANGES IN EACH SELLER TYPE BY BRANDS****/

proc sql;
create table num_pricechange_sellertype_asin as
select
TIME1,
seller_type,
ASIN,
sum(case when pricechange ne 0 then 1 else 0 end) as num_pricechange
from
price_changes_sellertype
group by
time1,
seller_type,
ASIN;
quit;


proc sort data = num_pricechange_sellertype_asin (keep = time1)nodupkey out= numpricechange_sellerbrandtrend;
by time1;
run;
/**OUTCOME VECTOR*/
/*Amazon New   B00UD6EXIQ USAopoly - USO1**/
/*Amazon New	B00UB25IJA	USAopoly - USO2*/
/*Amazon New	B018RFIVYG	Fantasy Flight*/
/*Cluster 1	B00UD6EXIQ	USAopoly - USO1*/
/*Cluster 1 B018RFIVYG Fantasy Flight **/
/*Cluster 2	B00UD6EXIQ	USAopoly - USO1*/
/*Cluster 3	B00UB25IJA	USAopoly - USO2*/
/*Cluster 4	B00UB25IJA	USAopoly - USO2*/

/***full list of ASINs and brand**/
/*1589947207	Fantasy Flight*/
/*1616615885	Fantasy Flight*/
/*B00DLX51HK	4D Cityscape*/
/*B00UB25IJA	USAopoly*/
/*B00UD6EXIQ	USAopoly*/
/*B018RFIVYG	Fantasy Flight*/
/*B01F00U4VY	USAopoly*/
/*B01JA8B18M	Fantasy Flight*/

%macro seller_brand(a,b,c,d);
data num_pricechange_&d.&b.;
set num_pricechange_sellertype_asin;
where ASIN in &c. and seller_type = &a.;
rename num_pricechange = num_pricechange&b.&d.;
run;

proc sql;
create table num_pricechange1_&d.&b. as
select 
time1,
sum(num_pricechange&b.&d.) as num_pricechange1&b.&d.
from 
num_pricechange_&d.&b.
group by
time1;
quit;

proc sort data = num_pricechange1_&d.&b. NODUPKEY out= sorted;
by time1;
run;

data numpricechange_sellerbrandtrend;
merge numpricechange_sellerbrandtrend (in=a) sorted (in=b);
by time1;
if a ;
if a and not b then num_pricechange1&b.&d. = 0;
run;
%mend;

%seller_brand(a='3P Old',b= fflite, c =('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M'), d= used3p);
%seller_brand(a='Amazon Old',b= fflite, c=('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M'), d= usedamzn);
%seller_brand(a='Amazon New',b= fflite, c=('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M'), d= amzn);
%seller_brand(a='Cluster 1',b= fflite, c=('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M'), d= clust1);
%seller_brand(a='Cluster 2',b= fflite, c=('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M'), d= clust2);
%seller_brand(a='Cluster 3',b= fflite, c=('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M'), d= clust3);
%seller_brand(a='Cluster 4',b= fflite, c=('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M'), d= clust4);

%seller_brand(a='3P Old',b= usopoly, c = ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY'), d= used3p);
%seller_brand(a='Amazon Old',b= usopoly, c = ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY'), d= usedamzn);
%seller_brand(a='Amazon New',b= usopoly, c = ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY'), d= amzn);
%seller_brand(a='Cluster 1',b= usopoly, c = ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY'), d= clust1);
%seller_brand(a='Cluster 2',b= usopoly, c = ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY'), d= clust2);
%seller_brand(a='Cluster 3',b= usopoly, c = ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY'), d= clust3);
%seller_brand(a='Cluster 4',b= usopoly, c = ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY'), d= clust4);

%seller_brand(a='3P Old',b= 4dcity, c =('B00DLX51HK'), d= used3p);
%seller_brand(a='Amazon Old',b= 4dcity, c=('B00DLX51HK'), d= usedamzn);
%seller_brand(a='Amazon New',b= 4dcity, c=('B00DLX51HK'), d= amzn);
%seller_brand(a='Cluster 1',b= 4dcity, c=('B00DLX51HK'), d= clust1);
%seller_brand(a='Cluster 2',b= 4dcity, c=('B00DLX51HK'), d= clust2);
%seller_brand(a='Cluster 3',b= 4dcity, c=('B00DLX51HK'), d= clust3);
%seller_brand(a='Cluster 4',b= 4dcity, c=('B00DLX51HK'), d= clust4);


/****************************/
/*****SELLER ATTRIBUTES******/
/****************************/
/***EXTRACTING NUM OF UNIQUE ASINS AND NUM OF ASINS SOLD CONDITION FOR EACH SELLER***/
PROC SQL;
create table seller_asin as
select
time1,
seller_ID,
count(distinct ASIN) as unique_ASIN_sold,
count(distinct time1) as sell_freq
from
SELLER_TYPE_TREND12
group by
time1,
seller_ID
;
quit;
/**MERGING BACK TO THE MAIN DATA SET**/
PROC SORT DATA = SELLER_TYPE_TREND12 OUT= SELLER_TYPE_TREND4;
BY time1 seller_ID;
RUN;
PROC SORT DATA = seller_asin OUT= seller_asin1;
BY time1 seller_ID;
RUN;
DATA SELLER_TYPE_TREND5;
MERGE SELLER_TYPE_TREND4 (IN=A) seller_asin1 (IN=B);
BY time1 seller_ID;
IF A AND B ;
RUN;

/**creating cumulative sell freq over time***/
proc sort data = SELLER_TYPE_TREND5 (keep= time1 seller_ID sell_freq) nodupkey out=SELLER_TYPE_TREND6;
by seller_ID time1;
run;

data cum_sellfreq;
  set SELLER_TYPE_TREND6;
  by seller_ID ;
  if first.seller_ID then cum_sellerfreq = 0;
  cum_sellerfreq +sell_freq;
run;
/**merging back to main data set**/
PROC SORT DATA = SELLER_TYPE_TREND5 OUT= SELLER_TYPE_TREND7;
BY time1 seller_ID;
RUN;
PROC SORT DATA = cum_sellfreq OUT= cum_sellfreq1;
BY time1 seller_ID;
RUN;
DATA SELLER_TYPE_TREND8;
MERGE SELLER_TYPE_TREND7 (IN=A) cum_sellfreq1 (IN=B);
BY time1 seller_ID;
IF A AND B ;
RUN;

PROC SQL;
create table SELLER_CHAR as
select
time1,
seller_type,
SUM(FBA) AS NUM_FBA,
sum(prime) as num_prime,
SUM(FREESHIP) AS NUM_FREESHIP,
SUM(CONDITION) AS NUM_NEWOFFERS,
sum(case when condition = 0 then 1 else 0 end) AS NUM_USEDOFFERS,
max(num_seller_Posrating) as max_perct_sellerrating,
mean(num_seller_Posrating) as mean_perct_sellerrating,
min(num_seller_Posrating) as min_perct_sellerrating,
max(num_seller_ratings) as max_num_seller_rating,
mean(num_seller_ratings) as mean_num_seller_rating,
min(num_seller_ratings) as min_num_seller_rating,
max(seller_rating) as max_seller_star_rating,
mean(seller_rating) as mean_seller_star_rating,
min(seller_rating) as min_seller_star_rating,
max(unique_ASIN_sold) as max_uniq_ASIN,
mean(unique_ASIN_sold) as mean_uniq_ASIN,
min(unique_ASIN_sold) as min_uniq_ASIN,
max(cum_sellerfreq) as max_cumsalefreq,
mean(cum_sellerfreq) as mean_cumsalefreq,
min(cum_sellerfreq) as min_cumsalefreq
from
SELLER_TYPE_TREND8
group by
time1,
seller_type
;
quit;

/**rename each seller char to individual columns for each seller cluster for model build, i.e., max_fba_clust1, min_fba_clust1 eusopoly. ****/

proc sort data = SELLER_CHAR  (keep=time1) nodupkey out=seller_char_sorted;
by time1;
run;
data sellerchar_amzn;
set SELLER_CHAR;
where seller_type = 'Amazon New';
rename num_Freeship = Freeship_amzn;
rename num_prime = prime_amzn;
rename max_cumsalefreq = cumsalefreq_amzn;
run;
data sellerchar_usedamzn;
set SELLER_CHAR;
where seller_type = 'Amazon Old';
rename num_Freeship = Freeship_usedamzn;
run;
data sellerchar_used3p;
set SELLER_CHAR;
where seller_type = '3P Old';
rename num_FBA = FBA_used3p;
rename num_prime = prime_used3p;
rename num_Freeship = Freeship_used3p;
run;
proc sort data = sellerchar_amzn (keep = time1 Freeship_amzn prime_amzn cumsalefreq_amzn) out= sellerchar_amzn1;
by time1;
run;

proc sort data = sellerchar_usedamzn (keep = time1 Freeship_usedamzn  ) out= sellerchar_amznused1;
by time1;
run;
proc sort data = sellerchar_used3p (keep = time1 FBA_used3p Freeship_used3p prime_used3p ) out= sellerchar_used3p1;
by time1;
run;
data seller_char_sorted ;
merge seller_char_sorted (in=a) sellerchar_amzn1 (in=b) sellerchar_amznused1 (in =c) sellerchar_used3p1 (in=d);
by time1;
if a ;
if  a and not b then do;
Freeship_amzn = 0;
cumsalefreq_amzn = 0;
prime_amzn = 0;
end;
if a and not c then do;
Freeship_usedamzn = 0;
end;
if a and not d then do;
FBA_used3p = 0;
Freeship_used3p = 0;
prime_used3p = 0;
end;
run;

%macro sellerchar(a,b);
data sellerchar_&b.;
set SELLER_CHAR;
where seller_type = &a.;
rename num_FBA = num_FBA_&b.;
rename num_prime = num_prime_&b.;
rename num_Freeship = num_Freeship_&b.;
rename max_perct_sellerrating = max_perct_sellerrating_&b.;
rename mean_perct_sellerrating = mean_perct_sellerrating_&b.;
rename min_perct_sellerrating = min_perct_sellerrating_&b.;
rename max_num_seller_rating = max_num_seller_rating_&b.;
rename mean_num_seller_rating = mean_num_seller_rating_&b.;
rename min_num_seller_rating = min_num_seller_rating_&b.;
rename max_seller_star_rating = max_seller_star_rating_&b.;
rename mean_seller_star_rating = mean_seller_star_rating_&b.;
rename min_seller_star_rating = min_seller_star_rating_&b.;
rename max_uniq_ASIN = max_uniq_ASIN_&b.;
rename mean_uniq_ASIN = mean_uniq_ASIN_&b.;
rename min_uniq_ASIN = min_uniq_ASIN_&b.;
rename max_cumsalefreq = max_cumsalefreq_&b.;
rename mean_cumsalefreq = mean_cumsalefreq_&b.;
rename min_cumsalefreq = min_cumsalefreq_&b.;
run;

proc sort data = sellerchar_&b. (keep = time1 num_FBA_&b. num_prime_&b. num_Freeship_&b. max_perct_sellerrating_&b. 
mean_perct_sellerrating_&b. min_perct_sellerrating_&b. max_num_seller_rating_&b. mean_num_seller_rating_&b.
min_num_seller_rating_&b. max_seller_star_rating_&b. mean_seller_star_rating_&b. min_seller_star_rating_&b. 
max_uniq_ASIN_&b. mean_uniq_ASIN_&b. min_uniq_ASIN_&b. max_cumsalefreq_&b. mean_cumsalefreq_&b. 
min_cumsalefreq_&b.) out= sellerchar_sorted&b.;
by time1;
run;

data seller_char_sorted ;
merge seller_char_sorted (in=a) sellerchar_sorted&b. (in=b);
by time1;
if a ;
if a and not b then do;
num_FBA_&b. = 0;
num_prime_&b.=0;
num_Freeship_&b. = 0;
max_perct_sellerrating_&b. = 0;
mean_perct_sellerrating_&b. = 0;
min_perct_sellerrating_&b. = 0;
max_num_seller_rating_&b. = 0;
mean_num_seller_rating_&b. = 0;
min_num_seller_rating_&b. = 0;
max_seller_star_rating_&b. = 0;
mean_seller_star_rating_&b. = 0;
min_seller_star_rating_&b. = 0;
max_uniq_ASIN_&b. = 0;
mean_uniq_ASIN_&b. = 0;
min_uniq_ASIN_&b. = 0;
max_cumsalefreq_&b. = 0;
mean_cumsalefreq_&b. = 0;
min_cumsalefreq_&b. = 0;
end;
run;
%mend;
%sellerchar(a='Cluster 1', b = clust1); 
%sellerchar(a='Cluster 2', b = clust2); 
%sellerchar(a='Cluster 3', b = clust3); 
%sellerchar(a='Cluster 4', b = clust4); 

/***********************************/
/*************BRAND ATTRIBUTES*****/
/*********************************/
/**SUMMARIZING PRODUCT CHAR FOR fflite***/
proc contents data = focal_rival_prods27;
run;

PROC SQL;
create table PRODUCT_CHAR_fflite as
select
time1,
MAX(Answered_Questions) as max_ans_questions_fflite,
mean(Answered_Questions) as mean_ans_questions_fflite,
min(Answered_Questions) as min_ans_questions_fflite,
max(Buy_Box_Price1) as max_buyboxprice_fflite,
mean(Buy_Box_Price1) as mean_buyboxprice_fflite,
min(Buy_Box_Price1) as min_buyboxprice_fflite,
max(nonbuybox3p_price) as max_nonbbox3pprice_fflite,
mean(nonbuybox3p_price) as mean_nonbbox3pprice_fflite,
min(nonbuybox3p_price) as min_nonbbox3pprice_fflite,
max(Product_Star_Rate) as max_product_star_rate_fflite,
mean(Product_Star_Rate) as mean_product_star_rate_fflite,
min(Product_Star_Rate) as min_product_star_rate_fflite,
max(Product_reviews) as max_Product_reviews_fflite,
mean(Product_reviews) as mean_Product_reviews_fflite,
min(Product_reviews) as min_Product_reviews_fflite,
max(Sales_Rank_Own_Categ) as max_salesrank_categ_fflite,
mean(Sales_Rank_Own_Categ) as mean_salesrank_categ_fflite,
min(Sales_Rank_Own_Categ) as min_salesrank_categ_fflite,
max(Sales_Rank_Sub_categ) as max_salesrank_subcateg_fflite,
mean(Sales_Rank_Sub_categ) as mean_salesrank_subcateg_fflite,
min(Sales_Rank_Sub_categ) as min_salesrank_subcateg_fflite
from
focal_rival_prods27
WHERE ASIN IN ('1589947207','1616615885' , 'B018RFIVYG', 'B01JA8B18M')
group by
time1;
quit;
PROC SQL;
create table PRODUCT_CHAR_usopoly as
select
time1,
MAX(Answered_Questions) as max_ans_questions_usopoly,
mean(Answered_Questions) as mean_ans_questions_usopoly,
min(Answered_Questions) as min_ans_questions_usopoly,
max(Buy_Box_Price1) as max_buyboxprice_usopoly,
mean(Buy_Box_Price1) as mean_buyboxprice_usopoly,
min(Buy_Box_Price1) as min_buyboxprice_usopoly,
max(nonbuybox3p_price) as max_nonbbox3pprice_uso,
mean(nonbuybox3p_price) as mean_nonbbox3pprice_uso,
min(nonbuybox3p_price) as min_nonbbox3pprice_uso,
max(Product_Star_Rate) as max_product_star_rate_usopoly,
mean(Product_Star_Rate) as mean_product_star_rate_usopoly,
min(Product_Star_Rate) as min_product_star_rate_usopoly,
max(Product_reviews) as max_Product_reviews_usopoly,
mean(Product_reviews) as mean_Product_reviews_usopoly,
min(Product_reviews) as min_Product_reviews_usopoly,
max(Sales_Rank_Own_Categ) as max_salesrank_categ_usopoly,
mean(Sales_Rank_Own_Categ) as mean_salesrank_categ_usopoly,
min(Sales_Rank_Own_Categ) as min_salesrank_categ_usopoly,
max(Sales_Rank_Sub_categ) as max_salesrank_subcateg_usopoly,
mean(Sales_Rank_Sub_categ) as mean_salesrank_subcateg_usopoly,
min(Sales_Rank_Sub_categ) as min_salesrank_subcateg_usopoly
from
focal_rival_prods27
WHERE ASIN IN ( 'B00UB25IJA', 'B00UD6EXIQ', 'B01F00U4VY')
group by
time1;
quit;
/**MERGING BACK TO THE SELLER CHARACTERISTICS DATA***/
DATA seller_char_sorted;
MERGE seller_char_sorted (IN=A) PRODUCT_CHAR_fflite (IN=B) PRODUCT_CHAR_usopoly (in=c) SELLER_offercharcs_base (in=d);
BY TIME1;
IF A ;
IF A AND NOT B THEN DO;
max_ans_questions_fflite =0;
mean_ans_questions_fflite =0;
min_ans_questions_fflite =0;
max_buyboxprice_fflite =0;
mean_buyboxprice_fflite =0;
min_buyboxprice_fflite =0;
max_nonbbox3pprice_fflite =0;
mean_nonbbox3pprice_fflite = 0;
min_nonbbox3pprice_fflite = 0;
max_product_star_rate_fflite =0;
mean_product_star_rate_fflite =0;
min_product_star_rate_fflite =0;
max_Product_reviews_fflite =0;
mean_Product_reviews_fflite =0;
min_Product_reviews_fflite =0;
max_salesrank_categ_fflite =0;
mean_salesrank_categ_fflite =0;
min_salesrank_categ_fflite =0;
max_salesrank_subcateg_fflite =0;
mean_salesrank_subcateg_fflite =0;
min_salesrank_subcateg_fflite =0;
END;
IF A AND NOT C THEN DO;
max_ans_questions_usopoly =0;
mean_ans_questions_usopoly =0;
min_ans_questions_usopoly =0;
max_buyboxprice_usopoly =0;
mean_buyboxprice_usopoly =0;
min_buyboxprice_usopoly =0;
max_nonbbox3pprice_uso = 0;
mean_nonbbox3pprice_uso = 0;
min_nonbbox3pprice_uso = 0;
max_product_star_rate_usopoly =0;
mean_product_star_rate_usopoly =0;
min_product_star_rate_usopoly =0;
max_Product_reviews_usopoly =0;
mean_Product_reviews_usopoly =0;
min_Product_reviews_usopoly =0;
max_salesrank_categ_usopoly =0;
mean_salesrank_categ_usopoly =0;
min_salesrank_categ_usopoly =0;
max_salesrank_subcateg_usopoly =0;
mean_salesrank_subcateg_usopoly =0;
min_salesrank_subcateg_usopoly =0;
END;
RUN;


proc sort data = seller_char_sorted out= ALL_char_sorted ;
by time1;
run;
%macro prodchar(a,b);
data prodchar_&b.;
set focal_rival_prods27;
where ASIN = &a.;
rename Answered_Questions = Answered_Questions&b. ;
rename Buy_Box_Price1 = buyboxprice_&b.;
rename nonbuybox3p_price = nonbuybox3pprice_&b. ;
rename Product_Star_Rate = productstar_&b.;
rename Product_reviews = Product_reviews_&b.;
rename Sales_Rank_Own_Categ = Salerank_&b.;
rename Sales_Rank_Sub_categ = Salerank_subcat_&b.;
run;

proc sort data = prodchar_&b. (keep = time1  Answered_Questions&b. buyboxprice_&b. nonbuybox3pprice_&b. productstar_&b. 
Product_reviews_&b. Salerank_&b. Salerank_subcat_&b.) NODUPKEY out= prodchar_sorted&b.;
by time1;
run;

data ALL_char_sorted ;
merge ALL_char_sorted (in=a) prodchar_sorted&b. (in=b);
by time1;
if a;
IF A AND NOT B THEN DO;
Answered_Questions&b. = 0;
buyboxprice_&b. = 0;
nonbuybox3pprice_&b. = 0;
productstar_&b. = 0;
Product_reviews_&b. = 0;
Salerank_&b. = 0;
Salerank_subcat_&b. = 0;
END;
run;
%mend;
%prodchar(a= 'B00DLX51HK', b = 4dcity);

/**OUTCOME VECTOR*/
/*Amazon New	B00UB25IJA	USAopoly*/
/*Amazon New	B018RFIVYG	Fantasy Flight*/
/*Cluster 1	B00UD6EXIQ	USAopoly*/
/*Cluster 2	B00UD6EXIQ	USAopoly*/
/*Cluster 3	B00UB25IJA	USAopoly*/
/*Cluster 4	B00UB25IJA	USAopoly*/

/***full list of ASINs and brand**/
/*1589947207	Fantasy Flight*/
/*1616615885	Fantasy Flight*/
/*B00DLX51HK	4D Cityscape*/
/*B00UB25IJA	USAopoly*/
/*B00UD6EXIQ	USAopoly*/
/*B018RFIVYG	Fantasy Flight*/
/*B01F00U4VY	USAopoly*/
/*B01JA8B18M	Fantasy Flight*/

/**MERGING WITH BRAND SELLER PRICE CHANGE TREND**/
DATA ALL_COVARIATES ;
MERGE ALL_char_sorted (IN = A) numpricechange_sellerbrandtrend (IN=B) ;
BY TIME1;
IF A ;
RUN;
proc contents data = final_outputvector;
run;
/*****merging with final ouusopolyome vector****/

proc sort data = final_outputvector ;
by time1;
run;

data merge_finaldata;
merge final_outputvector (in=a) ALL_COVARIATES (in=b);
by time1;
if a;
rename pricechange_Amzn_fflite = Y_amzn_fflite;
rename pricechange_Amzn_uso1 = Y_amzn_uso1;
rename pricechange_Amzn_uso2 = Y_amzn_uso2;
rename pricechange_clust1_uso1 = Y_clust1_uso1;
rename pricechange_clust1_fflite = Y_clust1_fflite;
rename pricechange_clust2_uso1 = Y_clust2_uso1;
rename pricechange_clust3_uso2 = Y_clust3_uso2;
rename pricechange_clust4_uso2 = Y_clust4_uso2;
run;
proc print data = merge_finaldata;
var Y_clust1_uso1;
run;
proc contents data = merge_finaldata;
run;
proc sort data = merge_finaldata NODUPREC out=merge_finaldata1 ;
by  TIME1 ;
run;
proc expand data=merge_finaldata1 out=merge_finaldata2 method = none; 
convert	Y_amzn_fflite	=	Y_amzn_fflite_lag1	/transformout=(lag 1);
convert	Y_amzn_fflite	=	Y_amzn_fflite_lag2	/transformout=(lag 2);
convert	Y_amzn_fflite	=	Y_amzn_fflite_lag3	/transformout=(lag 3);
convert	Y_amzn_uso1	=	Y_amzn_uso1_lag1	/transformout=(lag 1);
convert	Y_amzn_uso1	=	Y_amzn_uso1_lag2	/transformout=(lag 2);
convert	Y_amzn_uso1	=	Y_amzn_uso1_lag3	/transformout=(lag 3);
convert	Y_amzn_uso2	=	Y_amzn_uso2_lag1	/transformout=(lag 1);
convert	Y_amzn_uso2	=	Y_amzn_uso2_lag2	/transformout=(lag 2);
convert	Y_amzn_uso2	=	Y_amzn_uso2_lag3	/transformout=(lag 3);
convert	Y_clust1_uso1	=	Y_clust1_uso1_lag1	/transformout=(lag 1);
convert	Y_clust1_uso1	=	Y_clust1_uso1_lag2	/transformout=(lag 2);
convert	Y_clust1_uso1	=	Y_clust1_uso1_lag3	/transformout=(lag 3);
convert	Y_clust1_fflite	=	Y_clust1_fflite_lag1	/transformout=(lag 1);
convert	Y_clust1_fflite	=	Y_clust1_fflite_lag2	/transformout=(lag 2);
convert	Y_clust1_fflite	=	Y_clust1_fflite_lag3	/transformout=(lag 3);
convert	Y_clust2_uso1	=	Y_clust2_uso1_lag1	/transformout=(lag 1);
convert	Y_clust2_uso1	=	Y_clust2_uso1_lag2	/transformout=(lag 2);
convert	Y_clust2_uso1	=	Y_clust2_uso1_lag3	/transformout=(lag 3);
convert	Y_clust3_uso2	=	Y_clust3_uso2_lag1	/transformout=(lag 1);
convert	Y_clust3_uso2	=	Y_clust3_uso2_lag2	/transformout=(lag 2);
convert	Y_clust3_uso2	=	Y_clust3_uso2_lag3	/transformout=(lag 3);
convert	Y_clust4_uso2	=	Y_clust4_uso2_lag1	/transformout=(lag 1);
convert	Y_clust4_uso2	=	Y_clust4_uso2_lag2	/transformout=(lag 2);
convert	Y_clust4_uso2	=	Y_clust4_uso2_lag3	/transformout=(lag 3);
convert	Answered_Questions4dcity	=	Answered_Questions4dcity_lag1	/transformout=(lag 1);
convert	FBA_used3p	=	FBA_used3p_lag1	/transformout=(lag 1);
convert	Freeship_amzn	=	Freeship_amzn_lag1	/transformout=(lag 1);
convert	Freeship_used3p	=	Freeship_used3p_lag1	/transformout=(lag 1);
convert	Freeship_usedamzn	=	Freeship_usedamzn_lag1	/transformout=(lag 1);
convert	NUM_SELLERSamzn	=	numSELLERSamzn_lag1	/transformout=(lag 1);
convert	NUM_SELLERSclust1	=	numSELLERSclust1_lag1	/transformout=(lag 1);
convert	NUM_SELLERSclust2	=	numSELLERSclust2_lag1	/transformout=(lag 1);
convert	NUM_SELLERSclust3	=	numSELLERSclust3_lag1	/transformout=(lag 1);
convert	NUM_SELLERSclust4	=	numSELLERSclust4_lag1	/transformout=(lag 1);
convert	NUM_SELLERSused3p	=	numSELLERSused3p_lag1	/transformout=(lag 1);
convert	NUM_SELLERSusedamzn	=	numSELLERSusedamzn_lag1	/transformout=(lag 1);
convert	NUM_SKUSamzn	=	numSKUSamzn_lag1	/transformout=(lag 1);
convert	NUM_SKUSclust1	=	numSKUSclust1_lag1	/transformout=(lag 1);
convert	NUM_SKUSclust2	=	numSKUSclust2_lag1	/transformout=(lag 1);
convert	NUM_SKUSclust3	=	numSKUSclust3_lag1	/transformout=(lag 1);
convert	NUM_SKUSclust4	=	numSKUSclust4_lag1	/transformout=(lag 1);
convert	NUM_SKUSused3p	=	numSKUSused3p_lag1	/transformout=(lag 1);
convert	NUM_SKUSusedamzn	=	numSKUSusedamzn_lag1	/transformout=(lag 1);
convert	NUM_ffliteamzn	=	numffliteamzn_lag1	/transformout=(lag 1);
convert	NUM_ffliteclust1	=	numffliteclust1_lag1	/transformout=(lag 1);
convert	NUM_ffliteclust2	=	numffliteclust2_lag1	/transformout=(lag 1);
convert	NUM_ffliteclust3	=	numffliteclust3_lag1	/transformout=(lag 1);
convert	NUM_ffliteclust4	=	numffliteclust4_lag1	/transformout=(lag 1);
convert	NUM_ffliteused3p	=	numffliteused3p_lag1	/transformout=(lag 1);
convert	NUM_ffliteusedamzn	=	numffliteusedamzn_lag1	/transformout=(lag 1);
convert	Product_reviews_4dcity	=	Product_reviews_4dcity_lag1	/transformout=(lag 1);
convert	Salerank_4dcity	=	Salerank_4dcity_lag1	/transformout=(lag 1);
convert	Salerank_subcat_4dcity	=	Salerank_subcat_4dcity_lag1	/transformout=(lag 1);
convert	buyboxprice_4dcity	=	buyboxprice_4dcity_lag1	/transformout=(lag 1);
convert	cumsalefreq_amzn	=	cumsalefreq_amzn_lag1	/transformout=(lag 1);
convert	max_Product_reviews_fflite	=	maxProduct_reviews_fflite_lag1	/transformout=(lag 1);
convert	max_Product_reviews_usopoly	=	maxProduct_reviews_usopoly_lag1	/transformout=(lag 1);
convert	max_ans_questions_fflite	=	maxans_questions_fflite_lag1	/transformout=(lag 1);
convert	max_ans_questions_usopoly	=	maxans_questions_usopoly_lag1	/transformout=(lag 1);
convert	max_buyboxprice_fflite	=	maxbuyboxprice_fflite_lag1	/transformout=(lag 1);
convert	max_buyboxprice_usopoly	=	maxbuyboxprice_usopoly_lag1	/transformout=(lag 1);
convert	max_cumsalefreq_clust1	=	maxcumsalefreq_clust1_lag1	/transformout=(lag 1);
convert	max_cumsalefreq_clust2	=	maxcumsalefreq_clust2_lag1	/transformout=(lag 1);
convert	max_cumsalefreq_clust3	=	maxcumsalefreq_clust3_lag1	/transformout=(lag 1);
convert	max_cumsalefreq_clust4	=	maxcumsalefreq_clust4_lag1	/transformout=(lag 1);
convert	max_num_seller_rating_clust1	=	maxnumsellerate_clust1_lag1	/transformout=(lag 1);
convert	max_num_seller_rating_clust2	=	maxnumsellerate_clust2_lag1	/transformout=(lag 1);
convert	max_num_seller_rating_clust3	=	maxnumsellerate_clust3_lag1	/transformout=(lag 1);
convert	max_num_seller_rating_clust4	=	maxnumsellerate_clust4_lag1	/transformout=(lag 1);
convert	max_perct_sellerrating_clust1	=	maxperct_sellerate_clust1_lag1	/transformout=(lag 1);
convert	max_perct_sellerrating_clust2	=	maxperct_sellerate_clust2_lag1	/transformout=(lag 1);
convert	max_perct_sellerrating_clust3	=	maxperct_sellerate_clust3_lag1	/transformout=(lag 1);
convert	max_perct_sellerrating_clust4	=	maxperct_sellerate_clust4_lag1	/transformout=(lag 1);
convert	max_product_star_rate_fflite	=	maxproduct_star_fflite_lag1	/transformout=(lag 1);
convert	max_product_star_rate_usopoly	=	maxproduct_star_usopoly_lag1	/transformout=(lag 1);
convert	max_salesrank_categ_fflite	=	maxsalesrank_fflite_lag1	/transformout=(lag 1);
convert	max_salesrank_categ_usopoly	=	maxsalesrank_usopoly_lag1	/transformout=(lag 1);
convert	max_salesrank_subcateg_fflite	=	maxsalesranksubcatfflite_lag1	/transformout=(lag 1);
convert	max_salesrank_subcateg_usopoly	=	maxsalesranksubcatusopoly_lag1	/transformout=(lag 1);
convert	max_seller_star_rating_clust1	=	maxseller_star_clust1_lag1	/transformout=(lag 1);
convert	max_seller_star_rating_clust2	=	maxseller_star_clust2_lag1	/transformout=(lag 1);
convert	max_seller_star_rating_clust3	=	maxseller_star_clust3_lag1	/transformout=(lag 1);
convert	max_seller_star_rating_clust4	=	maxseller_star_clust4_lag1	/transformout=(lag 1);
convert	max_uniq_ASIN_clust1	=	maxuniq_ASIN_clust1_lag1	/transformout=(lag 1);
convert	max_uniq_ASIN_clust2	=	maxuniq_ASIN_clust2_lag1	/transformout=(lag 1);
convert	max_uniq_ASIN_clust3	=	maxuniq_ASIN_clust3_lag1	/transformout=(lag 1);
convert	max_uniq_ASIN_clust4	=	maxuniq_ASIN_clust4_lag1	/transformout=(lag 1);
convert	mean_Product_reviews_fflite	=	meanProduct_reviews_fflite_lag1	/transformout=(lag 1);
convert	mean_Product_reviews_usopoly	=	meanProduct_reviews_usopoly_lag1	/transformout=(lag 1);
convert	mean_ans_questions_fflite	=	meanans_questions_fflite_lag1	/transformout=(lag 1);
convert	mean_ans_questions_usopoly	=	meanans_questions_usopoly_lag1	/transformout=(lag 1);
convert	mean_buyboxprice_fflite	=	meanbuyboxprice_fflite_lag1	/transformout=(lag 1);
convert	mean_buyboxprice_usopoly	=	meanbuyboxprice_usopoly_lag1	/transformout=(lag 1);
convert	mean_nonbbox3pprice_fflite	=	meannonbbox3pprice_fflite_lag1	/transformout=(lag 1);
convert	mean_nonbbox3pprice_uso	=	meannonbbox3pprice_uso_lag1	/transformout=(lag 1);
convert	nonbuybox3pprice_4dcity	=	nonbbox3pprice_4dcity_lag1	/transformout=(lag 1);
convert	mean_cumsalefreq_clust1	=	meancumsalefreq_clust1_lag1	/transformout=(lag 1);
convert	mean_cumsalefreq_clust2	=	meancumsalefreq_clust2_lag1	/transformout=(lag 1);
convert	mean_cumsalefreq_clust3	=	meancumsalefreq_clust3_lag1	/transformout=(lag 1);
convert	mean_cumsalefreq_clust4	=	meancumsalefreq_clust4_lag1	/transformout=(lag 1);
convert	mean_num_seller_rating_clust1	=	meannumsellerate_clust1_lag1	/transformout=(lag 1);
convert	mean_num_seller_rating_clust2	=	meannumsellerate_clust2_lag1	/transformout=(lag 1);
convert	mean_num_seller_rating_clust3	=	meannumsellerate_clust3_lag1	/transformout=(lag 1);
convert	mean_num_seller_rating_clust4	=	meannumsellerate_clust4_lag1	/transformout=(lag 1);
convert	mean_perct_sellerrating_clust1	=	meanperct_sellerate_clust1_lag1	/transformout=(lag 1);
convert	mean_perct_sellerrating_clust2	=	meanperct_sellerate_clust2_lag1	/transformout=(lag 1);
convert	mean_perct_sellerrating_clust3	=	meanperct_sellerate_clust3_lag1	/transformout=(lag 1);
convert	mean_perct_sellerrating_clust4	=	meanperct_sellerate_clust4_lag1	/transformout=(lag 1);
convert	mean_product_star_rate_fflite	=	meanproduct_star_fflite_lag1	/transformout=(lag 1);
convert	mean_product_star_rate_usopoly	=	meanproduct_star_usopoly_lag1	/transformout=(lag 1);
convert	mean_salesrank_categ_fflite	=	meansalesrank_fflite_lag1	/transformout=(lag 1);
convert	mean_salesrank_categ_usopoly	=	meansalesrank_usopoly_lag1	/transformout=(lag 1);
convert	mean_salesrank_subcateg_fflite	=	meansalesranksubcatfflite_lag1	/transformout=(lag 1);
convert	mean_salesrank_subcateg_usopoly	=	meansalesranksubcatusopoly_lag1	/transformout=(lag 1);
convert	mean_seller_star_rating_clust1	=	meanseller_star_clust1_lag1	/transformout=(lag 1);
convert	mean_seller_star_rating_clust2	=	meanseller_star_clust2_lag1	/transformout=(lag 1);
convert	mean_seller_star_rating_clust3	=	meanseller_star_clust3_lag1	/transformout=(lag 1);
convert	mean_seller_star_rating_clust4	=	meanseller_star_clust4_lag1	/transformout=(lag 1);
convert	mean_uniq_ASIN_clust1	=	meanuniq_ASIN_clust1_lag1	/transformout=(lag 1);
convert	mean_uniq_ASIN_clust2	=	meanuniq_ASIN_clust2_lag1	/transformout=(lag 1);
convert	mean_uniq_ASIN_clust3	=	meanuniq_ASIN_clust3_lag1	/transformout=(lag 1);
convert	mean_uniq_ASIN_clust4	=	meanuniq_ASIN_clust4_lag1	/transformout=(lag 1);
convert	min_Product_reviews_fflite	=	minProduct_reviews_fflite_lag1	/transformout=(lag 1);
convert	min_Product_reviews_usopoly	=	minProduct_reviews_usopoly_lag1	/transformout=(lag 1);
convert	min_ans_questions_fflite	=	minans_questions_fflite_lag1	/transformout=(lag 1);
convert	min_ans_questions_usopoly	=	minans_questions_usopoly_lag1	/transformout=(lag 1);
convert	min_buyboxprice_fflite	=	minbuyboxprice_fflite_lag1	/transformout=(lag 1);
convert	min_buyboxprice_usopoly	=	minbuyboxprice_usopoly_lag1	/transformout=(lag 1);
convert	min_cumsalefreq_clust1	=	mincumsalefreq_clust1_lag1	/transformout=(lag 1);
convert	min_cumsalefreq_clust2	=	mincumsalefreq_clust2_lag1	/transformout=(lag 1);
convert	min_cumsalefreq_clust3	=	mincumsalefreq_clust3_lag1	/transformout=(lag 1);
convert	min_cumsalefreq_clust4	=	mincumsalefreq_clust4_lag1	/transformout=(lag 1);
convert	min_num_seller_rating_clust1	=	minnumsellerate_clust1_lag1	/transformout=(lag 1);
convert	min_num_seller_rating_clust2	=	minnumsellerate_clust2_lag1	/transformout=(lag 1);
convert	min_num_seller_rating_clust3	=	minnumsellerate_clust3_lag1	/transformout=(lag 1);
convert	min_num_seller_rating_clust4	=	minnumsellerate_clust4_lag1	/transformout=(lag 1);
convert	min_perct_sellerrating_clust1	=	minperct_sellerate_clust1_lag1	/transformout=(lag 1);
convert	min_perct_sellerrating_clust2	=	minperct_sellerate_clust2_lag1	/transformout=(lag 1);
convert	min_perct_sellerrating_clust3	=	minperct_sellerate_clust3_lag1	/transformout=(lag 1);
convert	min_perct_sellerrating_clust4	=	minperct_sellerate_clust4_lag1	/transformout=(lag 1);
convert	min_product_star_rate_fflite	=	minproduct_star_fflite_lag1	/transformout=(lag 1);
convert	min_product_star_rate_usopoly	=	minproduct_star_usopoly_lag1	/transformout=(lag 1);
convert	min_salesrank_categ_fflite	=	minsalesrank_fflite_lag1	/transformout=(lag 1);
convert	min_salesrank_categ_usopoly	=	minsalesrank_usopoly_lag1	/transformout=(lag 1);
convert	min_salesrank_subcateg_fflite	=	minsalesranksubcatfflite_lag1	/transformout=(lag 1);
convert	min_salesrank_subcateg_usopoly	=	minsalesranksubcatusopoly_lag1	/transformout=(lag 1);
convert	min_seller_star_rating_clust1	=	minseller_star_clust1_lag1	/transformout=(lag 1);
convert	min_seller_star_rating_clust2	=	minseller_star_clust2_lag1	/transformout=(lag 1);
convert	min_seller_star_rating_clust3	=	minseller_star_clust3_lag1	/transformout=(lag 1);
convert	min_seller_star_rating_clust4	=	minseller_star_clust4_lag1	/transformout=(lag 1);
convert	min_uniq_ASIN_clust1	=	minuniq_ASIN_clust1_lag1	/transformout=(lag 1);
convert	min_uniq_ASIN_clust2	=	minuniq_ASIN_clust2_lag1	/transformout=(lag 1);
convert	min_uniq_ASIN_clust3	=	minuniq_ASIN_clust3_lag1	/transformout=(lag 1);
convert	min_uniq_ASIN_clust4	=	minuniq_ASIN_clust4_lag1	/transformout=(lag 1);
convert	num_4dcityamzn	=	num4dcityamzn_lag1	/transformout=(lag 1);
convert	num_4dcityclust1	=	num4dcityclust1_lag1	/transformout=(lag 1);
convert	num_4dcityclust2	=	num4dcityclust2_lag1	/transformout=(lag 1);
convert	num_4dcityclust3	=	num4dcityclust3_lag1	/transformout=(lag 1);
convert	num_4dcityclust4	=	num4dcityclust4_lag1	/transformout=(lag 1);
convert	num_4dcityused3p	=	num4dcityused3p_lag1	/transformout=(lag 1);
convert	num_4dcityusedamzn	=	num4dcityusedamzn_lag1	/transformout=(lag 1);
convert	num_FBA_clust1	=	numFBA_clust1_lag1	/transformout=(lag 1);
convert	num_FBA_clust2	=	numFBA_clust2_lag1	/transformout=(lag 1);
convert	num_FBA_clust3	=	numFBA_clust3_lag1	/transformout=(lag 1);
convert	num_FBA_clust4	=	numFBA_clust4_lag1	/transformout=(lag 1);
convert	num_Freeship_clust1	=	numFreeship_clust1_lag1	/transformout=(lag 1);
convert	num_Freeship_clust2	=	numFreeship_clust2_lag1	/transformout=(lag 1);
convert	num_Freeship_clust3	=	numFreeship_clust3_lag1	/transformout=(lag 1);
convert	num_Freeship_clust4	=	numFreeship_clust4_lag1	/transformout=(lag 1);
convert	num_pricechange14dcityamzn	=	numprchng4dcityamzn_lag1	/transformout=(lag 1);
convert	num_pricechange14dcityclust1	=	numprchng4dcityclust1_lag1	/transformout=(lag 1);
convert	num_pricechange14dcityclust2	=	numprchng4dcityclust2_lag1	/transformout=(lag 1);
convert	num_pricechange14dcityclust3	=	numprchng4dcityclust3_lag1	/transformout=(lag 1);
convert	num_pricechange14dcityclust4	=	numprchng4dcityclust4_lag1	/transformout=(lag 1);
convert	num_pricechange14dcityused3p	=	numprchng4dcityused3p_lag1	/transformout=(lag 1);
convert	num_pricechange14dcityusedamzn	=	numprchng4dcityusedamzn_lag1	/transformout=(lag 1);
convert	num_pricechange1ffliteamzn	=	numprchngffliteamzn_lag1	/transformout=(lag 1);
convert	num_pricechange1ffliteclust1	=	numprchngffliteclust1_lag1	/transformout=(lag 1);
convert	num_pricechange1ffliteclust2	=	numprchngffliteclust2_lag1	/transformout=(lag 1);
convert	num_pricechange1ffliteclust3	=	numprchngffliteclust3_lag1	/transformout=(lag 1);
convert	num_pricechange1ffliteclust4	=	numprchngffliteclust4_lag1	/transformout=(lag 1);
convert	num_pricechange1ffliteused3p	=	numprchngffliteused3p_lag1	/transformout=(lag 1);
convert	num_pricechange1ffliteusedamzn	=	numprchngffliteusedamzn_lag1	/transformout=(lag 1);
convert	num_pricechange1usopolyamzn	=	numprchngusopolyamzn_lag1	/transformout=(lag 1);
convert	num_pricechange1usopolyclust1	=	numprchngusopolyclust1_lag1	/transformout=(lag 1);
convert	num_pricechange1usopolyclust2	=	numprchngusopolyclust2_lag1	/transformout=(lag 1);
convert	num_pricechange1usopolyclust3	=	numprchngusopolyclust3_lag1	/transformout=(lag 1);
convert	num_pricechange1usopolyclust4	=	numprchngusopolyclust4_lag1	/transformout=(lag 1);
convert	num_pricechange1usopolyused3p	=	numprchngusopolyused3p_lag1	/transformout=(lag 1);
convert	num_pricechange1usopolyusedamzn	=	numprchngusopolyusedamzn_lag1	/transformout=(lag 1);
convert	num_prime_clust1	=	numprime_clust1_lag1	/transformout=(lag 1);
convert	num_prime_clust2	=	numprime_clust2_lag1	/transformout=(lag 1);
convert	num_prime_clust3	=	numprime_clust3_lag1	/transformout=(lag 1);
convert	num_prime_clust4	=	numprime_clust4_lag1	/transformout=(lag 1);
convert	num_usopolyamzn	=	numusopolyamzn_lag1	/transformout=(lag 1);
convert	num_usopolyclust1	=	numusopolyclust1_lag1	/transformout=(lag 1);
convert	num_usopolyclust2	=	numusopolyclust2_lag1	/transformout=(lag 1);
convert	num_usopolyclust3	=	numusopolyclust3_lag1	/transformout=(lag 1);
convert	num_usopolyclust4	=	numusopolyclust4_lag1	/transformout=(lag 1);
convert	num_usopolyused3p	=	numusopolyused3p_lag1	/transformout=(lag 1);
convert	num_usopolyusedamzn	=	numusopolyusedamzn_lag1	/transformout=(lag 1);
convert priceAmzn_fflite = priceAmzn_fflite_lag /transformout=(lag 1);
convert priceAmzn_uso1 = priceAmzn_uso1_lag /transformout=(lag 1);
convert priceAmzn_uso2 = priceAmzn_uso2_lag /transformout=(lag 1);
convert priceclust1_fflite = priceclust1_fflite_lag /transformout=(lag 1);
convert priceclust1_uso1 = priceclust1_uso1_lag /transformout=(lag 1);
convert priceclust2_uso1 = priceclust2_uso1_lag /transformout=(lag 1);
convert priceclust3_uso2 = priceclust3_uso2_lag /transformout=(lag 1);
convert priceclust4_uso2 = priceclust4_uso2_lag /transformout=(lag 1);
convert pricediff_lowst_Amzn_fflite =  pricedifflowstAmzn_fflite_lag /transformout=(lag 1);
convert pricediff_lowst_Amzn_uso1 =    pricedifflowstAmzn_uso1_lag /transformout=(lag 1);
convert pricediff_lowst_Amzn_uso2 =    pricedifflowstAmzn_uso2_lag /transformout=(lag 1);
convert pricediff_lowst_clust1_fflite =  pricedifflowstclust1_fflite_lag /transformout=(lag 1);
convert pricediff_lowst_clust1_uso1 =    pricedifflowstclust1_uso1_lag /transformout=(lag 1);
convert pricediff_lowst_clust2_uso1 =    pricedifflowstclust2_uso1_lag /transformout=(lag 1);
convert pricediff_lowst_clust3_uso2 =    pricedifflowstclust3_uso2_lag /transformout=(lag 1);
convert pricediff_lowst_clust4_uso2 =    pricedifflowstclust4_uso2_lag /transformout=(lag 1);
convert	prime_amzn	=	prime_amzn_lag1	/transformout=(lag 1);
convert	prime_used3p	=	prime_used3p_lag1	/transformout=(lag 1);
convert	productstar_4dcity	=	productstar_4dcity_lag1	/transformout=(lag 1);
run;

/**merging back seasonality indicator to the modeling data ***/

proc sort data = amazon_kitchnequip3 (keep = time1 season) nodupkey out= seasonality_data;
by time1;
run;
data mvrf_model1;
merge merge_finaldata2 (in=a) seasonality_data (in=b);
by time1 ;
if a and b;
run;

/***ADDING External site data*****/

/**luggage SKUs scraped from Walmart and Sears only and not HD***/

PROC import 
DATAfile ="D:\Amazon Price Dynamics\New Data\Data Scraped\Walmart_merged\walmart_Pricechangesallcateg_4-6-20.csv" 
DBMS=csv
OUT= wm_pricechange_allcateg
REPLACE;
run;

PROC import 
DATAfile ="D:\Amazon Price Dynamics\New Data\Data Scraped\Sears_merged\Sears_Pricechangesallcateg_4-6-20.csv" 
DBMS=csv
OUT= sears_pricechange_allcateg
REPLACE;
run;
data sears_pricechange_allcateg;
set sears_pricechange_allcateg;
rename date = time1;
run;

proc sql;
create table WM as
select 
time1,
categ,
max(pricechange) as max_pricechange_wm,
mean(pricechange) as mean_pricechange_wm,
min(pricechange) as min_pricechange_wm
from
wm_pricechange_allcateg
where time1 >= '14Dec2017'd and time1 <= '06May2018'd 
group by
time1,
categ;
quit;
data wm1;
set wm;
if abs(min_pricechange_wm) > abs(max_pricechange_wm) then maxpricechange_wm = min_pricechange_wm;
else if abs(min_pricechange_wm) < abs(max_pricechange_wm) then maxpricechange_wm = max_pricechange_wm;
else maxpricechange_wm = 0;
run;
proc freq data = wm1;
tables time1;run;

proc sql;
create table Sears as
select 
time1,
categ,
max(pricechange) as max_pricechange_sears,
mean(pricechange) as mean_pricechange_sears,
min(pricechange) as min_pricechange_sears
from
sears_pricechange_allcateg
where time1 >= '14Dec2017'd and time1 <= '06May2018'd 
group by
time1,
categ;
quit;
data sears1;
set sears;
if abs(min_pricechange_sears) > abs(max_pricechange_sears) then maxpricechange_sears = min_pricechange_sears;
else if abs(min_pricechange_sears) < abs(max_pricechange_sears) then maxpricechange_sears = max_pricechange_sears;
else maxpricechange_sears = 0;
run;

/*data merged_allwebsites (drop=  min_pricechange_wm max_pricechange_wm*/
/*min_pricechange_sears max_pricechange_sears);*/
/*merge WM1 (in=b) Sears1 (in=c);*/
/*by time1 categ;*/
/*if  b and c;*/
/*if abs(maxpricechange_WM) = max(abs(maxpricechange_WM),abs(maxpricechange_sears)) then maxpricechange_externalsite = maxpricechange_WM;*/
/*else if abs(maxpricechange_sears) = max(abs(maxpricechange_WM),abs(maxpricechange_sears)) then maxpricechange_externalsite = maxpricechange_sears;*/
/*else maxpricechange_externalsite = 0;*/
/*meanpricechange_externalsite = mean(mean_pricechange_WM,mean_pricechange_sears);*/
/*run;*/

data merged_allwebsites (drop= min_pricechange_wm max_pricechange_wm
min_pricechange_sears max_pricechange_sears);
merge WM1 (in=b) Sears1 (in=c);
by time1 categ;
if b and c;
run;

/**export this above file for all other categories**/
/**extracting only Board Games categ***/

proc freq data = merged_allwebsites;
tables categ;
run;

data external_boardgamesprices;
set merged_allwebsites;
where categ = 'Board Games';
run;

/**/
/*proc sort data = external_boardgamesprices (drop = maxpricechange_WM mean_pricechange_WM */
/*maxpricechange_sears mean_pricechange_sears categ)*/
/*out=externalsite_data2;*/
/*by time1;*/
/*run;*/
/**/
/*proc expand data=externalsite_data2 out=externalsite_data3 method = none; */
/*convert	maxpricechange_externalsite	=	maxpricechange_extsite_lag1	/transformout=(lag 1);*/
/*convert	meanpricechange_externalsite	=	meanpricechange_extsite_lag1	/transformout=(lag 1);*/
/*run;*/
/*data externalsite_data4 (drop= maxpricechange_externalsite meanpricechange_externalsite);*/
/*set externalsite_data3;*/
/*run;*/
/**/
/*proc sort data = mvrf_model1;*/
/*by time1;*/
/*run;*/
/**/
/*data mvrfdata_withexternalsite;*/
/*merge mvrf_model1 (in=a) externalsite_data4 (in=b);*/
/*by time1;*/
/*if a;*/
/*if a and not b then do;*/
/*externalsite_ind = 0;*/
/*maxpricechange_extsite_lag1 = 0;*/
/*meanpricechange_extsite_lag1 = 0;*/
/*end;*/
/*else externalsite_ind = 1;*/
/*run;*/
/*proc contents data = mvrfdata_withexternalsite;*/
/*run;*/
/**/
/*PROC EXPORT */
/*DATA=mvrfdata_withexternalsite*/
/*DBMS=csv*/
/*OUTFILE= "C:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\mvrfdata_withexternalsite_boardgames_May2020.csv"*/
/*REPLACE;*/
/*run;*/

/***merging with edited external site data****/
proc sort data = mvrf_model1;
by time1;
run;

proc sort data = external_boardgamesprices (drop = categ) out = externalsite_data2;
by time1;
run;
data mvrfdata_withexternalsite1;
merge mvrf_model1 (in=a) externalsite_data2 (in=b);
by time1;
if a;

if a and not b then do;
ext_site = 0;
maxpricechange_sears = 0;     
maxpricechange_wm = 0;     
mean_pricechange_sears = 0;     
mean_pricechange_wm = 0;
end;
else ext_site =1;
run;
/***lagging external site price changes****/ 
data mvrfdata_withexternalsite2;
set mvrfdata_withexternalsite1;
maxpricechange_searslag1 = lag(maxpricechange_sears);   
maxpricechange_wmlag1 = lag(maxpricechange_wm);
maxpricechange_searslag2 = lag2(maxpricechange_sears);   
maxpricechange_wmlag2 = lag2(maxpricechange_wm);
maxpricechange_searslag3 = lag3(maxpricechange_sears);   
maxpricechange_wmlag3 = lag3(maxpricechange_wm);
run;

/***DAY OF THE WEEK IDENTIFICATION**/

data mvrfdata_withexternalsite3;
set mvrfdata_withexternalsite2;
day_week = weekday(time1);
if day_week = 1 then sunday = 1; else sunday = 0;
if day_week = 2 then monday = 1; else monday = 0;
if day_week = 3 then tuesday = 1; else tuesday = 0;
if day_week = 4 then wednesday = 1; else wednesday = 0;
if day_week = 5 then thursday = 1; else thursday = 0;
if day_week = 6 then friday = 1; else friday = 0;
if day_week = 7 then saturday = 1; else saturday = 0;
if day_week in (1,7) then weekend = 1; else weekend = 0;
run;
/**/
/*PROC UNIVARIATE DATA = mvrfdata_withexternalsite1 ;*/
/*VAR */
/*Salerank_4dcity_lag1*/
/*maxsalesrank_fflite_lag1*/
/*maxsalesrank_usopoly_lag1*/
/*meansalesrank_fflite_lag1*/
/*meansalesrank_usopoly_lag1*/
/*minsalesrank_fflite_lag1*/
/*minsalesrank_usopoly_lag1*/
/*;*/
/*RUN;*/

data mvrfdata_withexternalsite4;
set mvrfdata_withexternalsite3;
top_salesrank = min(ifn(Salerank_4dcity_lag1=0,.,Salerank_4dcity_lag1),
                    ifn(maxsalesrank_fflite_lag1=0,.,maxsalesrank_fflite_lag1),
                    ifn(maxsalesrank_usopoly_lag1=0,.,maxsalesrank_usopoly_lag1), 
                    ifn(meansalesrank_fflite_lag1=0,.,meansalesrank_fflite_lag1),
                    ifn(meansalesrank_usopoly_lag1=0,.,meansalesrank_usopoly_lag1),
                    ifn(minsalesrank_fflite_lag1=0,.,minsalesrank_fflite_lag1),
					ifn(minsalesrank_usopoly_lag1=0,.,minsalesrank_usopoly_lag1));

bottom_salesrank = max(ifn(Salerank_4dcity_lag1=0,.,Salerank_4dcity_lag1),
                    ifn(maxsalesrank_fflite_lag1=0,.,maxsalesrank_fflite_lag1),
                    ifn(maxsalesrank_usopoly_lag1=0,.,maxsalesrank_usopoly_lag1), 
                    ifn(meansalesrank_fflite_lag1=0,.,meansalesrank_fflite_lag1),
                    ifn(meansalesrank_usopoly_lag1=0,.,meansalesrank_usopoly_lag1),
                    ifn(minsalesrank_fflite_lag1=0,.,minsalesrank_fflite_lag1),
					ifn(minsalesrank_usopoly_lag1=0,.,minsalesrank_usopoly_lag1));
run;

data mvrfdata_withexternalsite5;
set mvrfdata_withexternalsite4;

if top_salesrank ne . and top_salesrank ne bottom_salesrank then do;
if Salerank_4dcity_lag1 = top_salesrank then fourdcity_topbrandlag1 = 1; else fourdcity_topbrandlag1 = 0; 
if minsalesrank_fflite_lag1 = top_salesrank then fflite_topbrandlag1 = 1; else fflite_topbrandlag1 = 0; 
if minsalesrank_usopoly_lag1 = top_salesrank then usopoly_topbrandlag1 = 1; else usopoly_topbrandlag1 = 0; 
end;

if bottom_salesrank ne . and top_salesrank ne bottom_salesrank then do;
if Salerank_4dcity_lag1 = bottom_salesrank then fourdcity_bottombrandlag1 = 1; else fourdcity_bottombrandlag1 = 0; 
if minsalesrank_fflite_lag1 = bottom_salesrank then fflite_bottombrandlag1 = 1; else fflite_bottombrandlag1 = 0; 
if minsalesrank_usopoly_lag1 = bottom_salesrank then usopoly_bottombrandlag1 = 1; else usopoly_bottombrandlag1 = 0; 
end;

else do;
fourdcity_topbrandlag1 = 0;
fflite_topbrandlag1 = 0; 
usopoly_topbrandlag1 = 0; 
fourdcity_bottombrandlag1 = 0; 
fflite_bottombrandlag1 = 0;
usopoly_bottombrandlag1 = 0;
end;

run;
proc contents data = mvrfdata_withexternalsite5;
run;


/***creating main effect of price change**/
data final_gam_modeldata;
set mvrfdata_withexternalsite5;

/*price increase/decrease of 5% and -5%, with direction**/
/**Fantasy Flight**/
if Y_amzn_fflite_lag1 >= 0.05 and Y_amzn_fflite_lag1 < 0.1 then incr5per_amzn_fflite = 1; else incr5per_amzn_fflite = 0;
if Y_amzn_fflite_lag1 <= -0.05 and Y_amzn_fflite_lag1 > -0.1 then decr5per_amzn_fflite = 1; else decr5per_amzn_fflite = 0;

if Y_amzn_fflite_lag1 >= 0.1 and Y_amzn_fflite_lag1 < 0.2 then incr10per_amzn_fflite = 1; else incr10per_amzn_fflite = 0;
if Y_amzn_fflite_lag1 <= -0.1 and Y_amzn_fflite_lag1 > -0.2 then decr10per_amzn_fflite = 1; else decr10per_amzn_fflite = 0;

if Y_amzn_fflite_lag1 >= 0.2 then incr20per_amzn_fflite = 1; else incr20per_amzn_fflite = 0;
if Y_amzn_fflite_lag1 <= -0.2 then decr20per_amzn_fflite = 1; else decr20per_amzn_fflite = 0;

/**USAopoly**/
if Y_amzn_uso1_lag1 >= 0.05 and Y_amzn_uso1_lag1 < 0.1 then incr5per_amzn_uso1 = 1; else incr5per_amzn_uso1 = 0;
if Y_amzn_uso1_lag1 <= -0.05 and Y_amzn_uso1_lag1 > -0.1 then decr5per_amzn_uso1 = 1; else decr5per_amzn_uso1 = 0;

if Y_amzn_uso1_lag1 >= 0.1  and Y_amzn_uso1_lag1 < 0.2 then incr10per_amzn_uso1 = 1; else incr10per_amzn_uso1 = 0;
if Y_amzn_uso1_lag1 <= -0.1 and Y_amzn_uso1_lag1 > -0.2 then decr10per_amzn_uso1 = 1; else decr10per_amzn_uso1 = 0;

if Y_amzn_uso1_lag1 >= 0.2 then incr20per_amzn_uso1 = 1; else incr20per_amzn_uso1 = 0;
if Y_amzn_uso1_lag1 <= -0.2 then decr20per_amzn_uso1 = 1; else decr20per_amzn_uso1 = 0;

if Y_amzn_uso2_lag1 >= 0.05 and Y_amzn_uso2_lag1 < 0.1 then incr5per_amzn_uso2 = 1; else incr5per_amzn_uso2 = 0;
if Y_amzn_uso2_lag1 <= -0.05 and Y_amzn_uso2_lag1 > -0.1 then decr5per_amzn_uso2 = 1; else decr5per_amzn_uso2 = 0;

if Y_amzn_uso2_lag1 >= 0.1  and Y_amzn_uso2_lag1 < 0.2 then incr10per_amzn_uso2 = 1; else incr10per_amzn_uso2 = 0;
if Y_amzn_uso2_lag1 <= -0.1 and Y_amzn_uso2_lag1 > -0.2 then decr10per_amzn_uso2 = 1; else decr10per_amzn_uso2 = 0;

if Y_amzn_uso2_lag1 >= 0.2 then incr20per_amzn_uso2 = 1; else incr20per_amzn_uso2 = 0;
if Y_amzn_uso2_lag1 <= -0.2 then decr20per_amzn_uso2 = 1; else decr20per_amzn_uso2 = 0;

if Y_clust1_fflite_lag1 >= 0.05 and Y_clust1_fflite_lag1 < 0.1 then incr5per_clust1_fflite = 1; else incr5per_clust1_fflite = 0;
if Y_clust1_fflite_lag1 <= -0.05 and Y_clust1_fflite_lag1 > -0.1 then decr5per_clust1_fflite = 1; else decr5per_clust1_fflite = 0;
if Y_clust1_fflite_lag1 >= 0.1 and Y_clust1_fflite_lag1 < 0.2 then incr10per_clust1_fflite = 1; else incr10per_clust1_fflite = 0;
if Y_clust1_fflite_lag1 <= -0.1 and Y_clust1_fflite_lag1 > -0.2 then decr10per_clust1_fflite = 1; else decr10per_clust1_fflite = 0;
if Y_clust1_fflite_lag1 >= 0.2 then incr20per_clust1_fflite = 1; else incr20per_clust1_fflite = 0;
if Y_clust1_fflite_lag1 <= -0.2 then decr20per_clust1_fflite = 1; else decr20per_clust1_fflite = 0;

if Y_clust1_uso1_lag1 >= 0.05 and Y_clust1_uso1_lag1 < 0.1 then incr5per_clust1_uso1 = 1; else incr5per_clust1_uso1 = 0;
if Y_clust2_uso1_lag1 >= 0.05 and Y_clust2_uso1_lag1 < 0.1 then incr5per_clust2_uso1 = 1; else incr5per_clust2_uso1 = 0;
if Y_clust3_uso2_lag1 >= 0.05 and Y_clust3_uso2_lag1 < 0.1 then incr5per_clust3_uso2 = 1; else incr5per_clust3_uso2 = 0;
if Y_clust4_uso2_lag1 >= 0.05 and Y_clust4_uso2_lag1 < 0.1 then incr5per_clust4_uso2 = 1; else incr5per_clust4_uso2 = 0;

if Y_clust1_uso1_lag1 <= -0.05 and Y_clust1_uso1_lag1 > -0.1 then decr5per_clust1_uso1 = 1; else decr5per_clust1_uso1 = 0;
if Y_clust2_uso1_lag1 <= -0.05 and Y_clust2_uso1_lag1 > -0.1 then decr5per_clust2_uso1 = 1; else decr5per_clust2_uso1 = 0;
if Y_clust3_uso2_lag1 <= -0.05 and Y_clust3_uso2_lag1 > -0.1 then decr5per_clust3_uso2 = 1; else decr5per_clust3_uso2 = 0;
if Y_clust4_uso2_lag1 <= -0.05 and Y_clust4_uso2_lag1 > -0.1 then decr5per_clust4_uso2 = 1; else decr5per_clust4_uso2 = 0;

if Y_clust1_uso1_lag1 >= 0.1 and Y_clust1_uso1_lag1 < 0.2 then incr10per_clust1_uso1 = 1; else incr10per_clust1_uso1 = 0;
if Y_clust2_uso1_lag1 >= 0.1 and Y_clust2_uso1_lag1 < 0.2 then incr10per_clust2_uso1 = 1; else incr10per_clust2_uso1 = 0;
if Y_clust3_uso2_lag1 >= 0.1 and Y_clust3_uso2_lag1 < 0.2 then incr10per_clust3_uso2 = 1; else incr10per_clust3_uso2 = 0;
if Y_clust4_uso2_lag1 >= 0.1 and Y_clust4_uso2_lag1 < 0.2 then incr10per_clust4_uso2 = 1; else incr10per_clust4_uso2 = 0;

if Y_clust1_uso1_lag1 <= -0.1 and Y_clust1_uso1_lag1 > -0.2 then decr10per_clust1_uso1 = 1; else decr10per_clust1_uso1 = 0;
if Y_clust2_uso1_lag1 <= -0.1 and Y_clust2_uso1_lag1 > -0.2 then decr10per_clust2_uso1 = 1; else decr10per_clust2_uso1 = 0;
if Y_clust3_uso2_lag1 <= -0.1 and Y_clust3_uso2_lag1 > -0.2 then decr10per_clust3_uso2 = 1; else decr10per_clust3_uso2 = 0;
if Y_clust4_uso2_lag1 <= -0.1 and Y_clust4_uso2_lag1 > -0.2 then decr10per_clust4_uso2 = 1; else decr10per_clust4_uso2 = 0;

if Y_clust1_uso1_lag1 >= 0.2 then incr20per_clust1_uso1 = 1; else incr20per_clust1_uso1 = 0;
if Y_clust2_uso1_lag1 >= 0.2 then incr20per_clust2_uso1 = 1; else incr20per_clust2_uso1 = 0;
if Y_clust3_uso2_lag1 >= 0.2 then incr20per_clust3_uso2 = 1; else incr20per_clust3_uso2 = 0;
if Y_clust4_uso2_lag1 >= 0.2 then incr20per_clust4_uso2 = 1; else incr20per_clust4_uso2 = 0;

if Y_clust1_uso1_lag1 <= -0.2 then decr20per_clust1_uso1 = 1; else decr20per_clust1_uso1 = 0;
if Y_clust2_uso1_lag1 <= -0.2 then decr20per_clust2_uso1 = 1; else decr20per_clust2_uso1 = 0;
if Y_clust3_uso2_lag1 <= -0.2 then decr20per_clust3_uso2 = 1; else decr20per_clust3_uso2 = 0;
if Y_clust4_uso2_lag1 <= -0.2 then decr20per_clust4_uso2 = 1; else decr20per_clust4_uso2 = 0;

run;

PROC EXPORT 
DATA=final_gam_modeldata
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Board Games\final_gam_modeldata_JMRnd4_July2025.csv"
REPLACE;
run;
libname inn 'D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Board Games';
data inn.final_gam_modeldata;
set final_gam_modeldata;
run;

proc contents data = final_gam_modeldata;
run;
/**/
/*proc sql;*/
/*create table summ_num_changes as*/
/*select*/
/*sum(case when Y_amzn_fflite_lag1 ne 0 then 1 else 0 end) as num_amzn_fflitechanges,*/
/*sum(case when Y_amzn_usopoly_lag1 ne 0 then 1 else 0 end) as num_amzn_usopolychanges,*/
/*sum(case when Y_clust1_lag1 ne 0 then 1 else 0 end) as num_3p1_usopolychanges,*/
/*sum(case when Y_clust2_lag1 ne 0 then 1 else 0 end) as num_3p2_usopolychanges,*/
/*sum(case when Y_clust3_lag1 ne 0 then 1 else 0 end) as num_3p3_usopolychanges,*/
/*sum(case when Y_clust4_lag1 ne 0 then 1 else 0 end) as num_3p4_usopolychanges,*/
/*sum(case when incr20per_amzn_fflite ne 0 then 1 else 0 end) as num_incr20peramzn_fflitechanges,*/
/*sum(case when decr20per_amzn_fflite ne 0 then 1 else 0 end) as num_decr20peramzn_fflitechanges,*/
/*sum(case when incr10per_amzn_fflite ne 0 then 1 else 0 end) as num_incr10peramzn_fflitechanges,*/
/*sum(case when decr10per_amzn_fflite ne 0 then 1 else 0 end) as num_decr10peramzn_fflitechanges,*/
/*sum(case when incr5per_amzn_fflite ne 0 then 1 else 0 end) as num_incr5peramzn_fflitechanges,*/
/*sum(case when decr5per_amzn_fflite ne 0 then 1 else 0 end) as num_decr5peramzn_fflitechanges,*/
/*sum(case when incr20per_amzn_usopoly ne 0 then 1 else 0 end) as num_incr20peramzn_usopolychanges,*/
/*sum(case when decr20per_amzn_usopoly ne 0 then 1 else 0 end) as num_decr20peramzn_usopolychanges,*/
/*sum(case when incr10per_amzn_usopoly ne 0 then 1 else 0 end) as num_incr10peramzn_usopolychanges,*/
/*sum(case when decr10per_amzn_usopoly ne 0 then 1 else 0 end) as num_decr10peramzn_usopolychanges,*/
/*sum(case when incr5per_amzn_usopoly ne 0 then 1 else 0 end) as num_incr5peramzn_usopolychanges,*/
/*sum(case when decr5per_amzn_usopoly ne 0 then 1 else 0 end) as num_decr5peramzn_usopolychanges,*/
/*sum(case when incr20per_3p_usopoly ne 0 then 1 else 0 end) as num_incr20per3p_usopolychanges,*/
/*sum(case when decr20per_3p_usopoly ne 0 then 1 else 0 end) as num_decr20per3p_usopolychanges,*/
/*sum(case when incr10per_3p_usopoly ne 0 then 1 else 0 end) as num_incr10per3p_usopolychanges,*/
/*sum(case when decr10per_3p_usopoly ne 0 then 1 else 0 end) as num_decr10per3p_usopolychanges,*/
/*sum(case when incr5per_3p_usopoly ne 0 then 1 else 0 end) as num_incr5per3p_usopolychanges,*/
/*sum(case when decr5per_3p_usopoly ne 0 then 1 else 0 end) as num_decr5per3p_usopolychanges*/
/*from final_gam_modeldata;*/
/*quit;*/
/**/
/*proc print data = summ_num_changes;*/
/*run;*/
/**/
/*proc freq data = final_gam_modeldata;*/
/*tables fourdcity_topbrandlag1*/
/*fflite_topbrandlag1 */
/*usopoly_topbrandlag1 */
/*fourdcity_bottombrandlag1*/
/*fflite_bottombrandlag1 */
/*usopoly_bottombrandlag1 ;*/
/*run;*/
